Identifying heterogeneous treatment effects for online single-session interventions for adolescent depression: a secondary analysis
Note: Press the button on the upper right of this file to show all codes.
Report: Download PART_III.pdf. Git repo can be found here
1 Data preparation
First, load dataset and clean data as necessary.
## read and create subsets
cope_subset <- read_csv("data/cope_subset.csv")
cope_subset %>% datatable(
rownames = FALSE,
options = list(
pageLength = 5,
columnDefs = list(list(className = 'dt-center',
targets = 0:4))))## gender identity subset
dem_gender_subset <- cope_subset %>% dplyr::select(starts_with("b_dem_gender"))
## coping strategies
dem_cope_subset <- cope_subset %>% dplyr::select(starts_with("b_covid_cope"))
## family challenges
dem_family_subset <- cope_subset %>% dplyr::select(starts_with("b_covid_family"))
## race
dem_race_subset <- cope_subset %>% dplyr::select(starts_with("b_dem_race"))
## sexual orientation
dem_orient_subset <- cope_subset %>% dplyr::select(b_dem_orientation)
## coping strategy subset
dem_cope_subset <- dem_cope_subset %>%
dplyr::select(-"b_covid_cope_1_including_talking_with_people_you_trust_about_your_concerns_and_how_you_are_feeling")1.1 Collapse categories
Re-code sexual orientation, race, family challenge variables by collapsing some categories with limited number of samples, and generating composite categories.
1.1.0.1 Recode sexual orientation
## recode sexual orientation
recode_orient_dat <- dem_orient_subset %>%
mutate(recod_orient= case_when(
b_dem_orientation %in% c("Asexual", "I do not use a label", "Other/Not listed (please specify)") ~ "Other",
b_dem_orientation %in% c("Gay/Lesbian/Homosexual", "Queer", "Unsure/Questioning") ~ "LGBTQ",
b_dem_orientation %in% c("Bisexual", "Pansexual") ~ "LGBTQ",
b_dem_orientation == "Heterosexual/Straight" ~ "Heterosexual",
TRUE ~ b_dem_orientation
))1.1.0.2 Recode family challenges
## recode family challenge
### number of challenges
challenge_num_dat <- dem_family_subset %>%
mutate(challenge_num = apply(., 1, sum)) %>%
mutate(challenge_num = ifelse(b_covid_family_the_covid_19_pandemic_has_not_affected_me_or_my_family_in_these_ways_in_the_past_2_weeks=="1",0,challenge_num)) %>%
mutate(challenge_num3 = ifelse(challenge_num>=2, 2, challenge_num))
### category of challenges
challenge_cat_dat <- dem_family_subset %>%
mutate(challenge_cat = case_when(
b_covid_family_family_did_not_enough_enough_money_for_food == 1 |
b_covid_family_family_did_not_have_enough_money_for_gas_transportation == 1 |
b_covid_family_family_did_not_have_a_regular_place_to_sleep_or_stay==1 |
b_covid_family_family_did_not_have_enough_money_to_pay_rent == 1 ~ "Financial",
b_covid_family_i_could_not_attend_school_in_person == 1 |
b_covid_family_i_could_not_attend_school_at_all == 1 ~ "School",
b_covid_family_other == 1 ~ "Other",
b_covid_family_the_covid_19_pandemic_has_not_affected_me_or_my_family_in_these_ways_in_the_past_2_weeks == 1 ~ "No impact",
TRUE ~ "No"
))
challenge_cat_dat <- dem_family_subset %>%
mutate(
Financial = b_covid_family_family_did_not_enough_enough_money_for_food == 1 |
b_covid_family_family_did_not_have_enough_money_for_gas_transportation == 1 |
b_covid_family_family_did_not_have_a_regular_place_to_sleep_or_stay == 1 |
b_covid_family_family_did_not_have_enough_money_to_pay_rent == 1,
School = b_covid_family_i_could_not_attend_school_in_person == 1 |
b_covid_family_i_could_not_attend_school_at_all == 1,
Other = b_covid_family_other == 1,
No_impact = b_covid_family_the_covid_19_pandemic_has_not_affected_me_or_my_family_in_these_ways_in_the_past_2_weeks == 1,
challenge_cat = case_when(
Financial & School & Other ~ "Other",
Financial & School ~ "Other",
Financial & Other ~ "Other",
School & Other ~ "Other",
Financial ~ "Other",
School ~ "School",
Other ~ "Other",
No_impact ~ "No impact",
TRUE ~ "No challenge"
)
) %>%
dplyr::select(-Financial, -School, -Other, -No_impact)1.1.0.3 Recode race
## identify all race patterns
race_pattern_dat <- cope_subset %>%
dplyr::select(starts_with("b_dem_race")) %>%
mutate(race_pattern = apply(., 1, function(row) paste0(row, collapse = "")))
cope_subset1 <- cope_subset %>%
mutate(race_pattern =race_pattern_dat$race_pattern ) %>%
## recode race
mutate(recode_race = case_when(
race_pattern == "000010" ~ 'White',
race_pattern == "000001" ~ 'Black/African-American',
race_pattern == "010000" ~ 'Asian Including Asian Desi',
race_pattern == "001000" ~ 'Hispanic/Latinx',
race_pattern == "000000" ~ 'Prefer not to answer',
TRUE ~ 'Mixed'
)) %>%
## recode sexual orientation
mutate(recode_orient = recode_orient_dat$recod_orient) %>%
dplyr::select(-race_pattern) %>%
## recode language
mutate(recode_language = case_when(
b_dem_language == "English" ~ 'English',
TRUE ~ 'Other'
)) %>%
## reorder variables
dplyr::select(-b_dem_language,-starts_with("b_dem_race")) %>%
dplyr::select(recode_race,starts_with("b_dem_gender"),b_screener_age,b_dem_sex,b_dem_orientation,
starts_with("b_covid_family"), starts_with("b_covid_cope"),recode_language, everything()
)1.2 Latent Class Analysis(LCA)
Apply LCA to find the latent class variables for gender identity and coping strategies, respectively.
1.2.1 Gender identity
## Gender identity
tmp <- dem_gender_subset%>%
mutate(gender_pattern = apply(., 1, function(row) paste0(row, collapse = "")))
## check patterns
tmp1 <- tmp$gender_pattern %>% table() %>% data.frame()
genderid_dat <- dem_gender_subset
## check correlation
genderid_dat_cor <- cor(genderid_dat) %>% data.frame
# The transgender indicator and gender_expansive indicator variables are removed because these two variables are the
# composite variables of other indicators
### correlation matrix
#genderid_dat_cor_melted <- melt(as.matrix(genderid_dat_cor))
# ggplot(data = genderid_dat_cor_melted, aes(x=Var1, y=Var2, fill=value)) +
# geom_tile(color = "white") +
# scale_fill_gradient2(low = "blue", high = "red", mid = "white",
# midpoint = 0, limit = c(-1, 1), space = "Lab",
# name="Correlation") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, vjust = 1,
# size = 12, hjust = 1)) +
# coord_fixed() +
# labs(x = "", y = "") +
# ggtitle("Correlation Matrix Heatmap")
## prepare input data for poLCA
## only accept indicator variables coded start from 1 (transfer 01 --> 12)
recode_variables <- function(df) {
df <- df %>% mutate(across(everything(), ~ as.integer(as.factor(.))))
return(df)
}
## input of the poLCA
genderid_dat_LCA <- genderid_dat %>%
recode_variables() %>%
rename_with(~ str_remove(., "b_dem_gender_"), starts_with("b_dem_gender_"))
## define the indicator variables of LCA model
f1 <- cbind(
agender, not_sure, other_please_specify,
androgynous, nonbinary, two_spirited,
female_to_male_transgender_ftm, trans_female_trans_feminine,
trans_male_trans_masculine,
# gender_expansive, ## composite removed
third_gender, genderqueer, male_to_female_transgender_mtf,
man_boy,
# transgender, ## composite removed
woman_girl) ~ 1
set.seed(1017)
gender_LCA2 <- poLCA(f1, data = genderid_dat_LCA,
nclass = 2)
gender_LCA3 <- poLCA(f1, data = genderid_dat_LCA,
nclass = 3)
gender_LCA4 <- poLCA(f1, data = genderid_dat_LCA,
nclass = 4)
gender_LCA5 <- poLCA(f1, data = genderid_dat_LCA,
nclass = 5)
gender_models <- list(gender_LCA2, gender_LCA3, gender_LCA4, gender_LCA5)
gender_model_stats <- data.frame(
Model = 2:5,
G2 = sapply(gender_models, function(x) x$Gsq),
AIC = sapply(gender_models, function(x) x$aic),
BIC = sapply(gender_models, function(x) x$bic)
)1.2.1.1 Step 1: LCA model selection
Compare LCA models for different number of classes.
the LCA model with 3 latent classes has comparable \(G^2\), AIC, BIC. (could also use scree plot). therefore, a model with 3 latent class is selected for gender identity.
kable(gender_model_stats, caption = "Model Comparison for Different Number of Classes (Gender Identity variable)")| Model | G2 | AIC | BIC |
|---|---|---|---|
| 2 | 1348.0556 | 7529.195 | 7683.297 |
| 3 | 924.7536 | 7135.893 | 7369.704 |
| 4 | 821.6347 | 7062.774 | 7376.293 |
| 5 | 616.5568 | 6887.696 | 7280.923 |
1.2.1.2 Step 2: classification error: averaged posterior probability(APP)
print APP, results suggest low classification error(APP>0.7).
posterior_probs <- gender_LCA3$posterior
assigned_class <- apply(posterior_probs, 1, which.max)
mean_posterior_by_class <- numeric(ncol(posterior_probs))
for (k in 1:ncol(posterior_probs)) {
mean_posterior_by_class[k] <- mean(posterior_probs[assigned_class == k, k])
}
print(mean_posterior_by_class)## [1] 0.9648380 0.9863283 0.9650341
1.2.1.3 Step 3: Final latent class probabilities
The probability of belonging to a specific class is calculated for each individual, and each individual is classified to a specific class based on the max. posterior probability. Below is the results of the posterior probabilities for all subjects (prob. of being in class \(j\)).
## get predicted class (gender identity)
genderid_class <- gender_LCA3$predclass
genderid_predprob <- gender_LCA3$posterior
colnames(genderid_predprob) <- paste0("Class",c(1:3))
genderid_predprob %>% datatable(
rownames = FALSE,
options = list(
pageLength = 5,
columnDefs = list(list(className = 'dt-center',
targets = 0:2))))1.2.1.4 Step 4: Assign label to each latent class (gener identity)
Visualize the probabilities of answering yes of each item by latent class (Pr(individual answers yes to an item \(|\) in class \(j\))) to understand the underlying pattern.
# probabilities of answering yes by class
probs_list <- gender_LCA3$probs
probs_df <- do.call(rbind, lapply(names(probs_list), function(var) {
prob_df <- as.data.frame(probs_list[[var]])
prob_df$variable <- var
prob_df$class <- rownames(prob_df)
prob_df
})) %>% dplyr::select(-`Pr(1)`)
probs_df_list <- list()
for (var in names(probs_list)) {
prob_df <- as.data.frame(probs_list[[var]])
prob_df$variable <- var
prob_df$class <- rownames(prob_df)
probs_df_list[[var]] <- prob_df
}
probs_df_combined <- bind_rows(probs_df_list)%>%
dplyr::select(-`Pr(1)`) %>%
mutate(variable = dplyr::recode(variable,
"agender" = "agender",
"not_sure" = "not sure",
"other_please_specify" = "other",
"androgynous" = "androgynous",
"nonbinary" = "nonbinary",
"two_spirited" = "two spirited",
"female_to_male_transgender_ftm" = "female to male",
"trans_female_trans_feminine" = "female trans feminine",
"trans_male_trans_masculine" = "male trans masculine",
"third_gender" = "third gender",
"genderqueer" = "genderqueer",
"male_to_female_transgender_mtf" = "male to female",
"man_boy" = "man/boy",
"woman_girl" = "woman/girl"
)) %>%
mutate(class = dplyr::recode(class,
"class 1: " = "Class 1: Non-binary",
"class 2: " = "Class 2: Women/girls",
"class 3: " = "Class 3: Male/Masculine")
) %>%
group_by(variable) %>%
mutate(maxprob = max(`Pr(2)`)) %>%
ungroup() %>%
mutate(maxprob = ifelse(maxprob==`Pr(2)`,maxprob,NA) %>% round(2),
maxprob = ifelse(maxprob %in% c(0.01,0.02,0.03,0.12,0.23,0.2,0.19,0.09,0.38),NA,maxprob)
)
## the condit. prob
gender_prob_plot <- ggplot(probs_df_combined, aes(x = variable, y = `Pr(2)`, color = class, group = class)) +
geom_line(size = 1) +
geom_point(size = 1.5)+
geom_text(aes(label = round(maxprob, 2)), vjust = -1, size = 3, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) +
labs(x = "", y = "Probability of answering yes ") +
ggtitle("(A) Latent classes for gender identity")+
scale_color_brewer(palette = "Set1") +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
axis.title.y = element_text(size = 12),
legend.title = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
gender_prob_plot1.2.2 Coping strategies
1.2.2.1 Step 1: LCA model selection
Build and compare LCA models
cope_dat_LCA <- dem_cope_subset %>%
recode_variables() %>%
rename_with(~ str_remove(., "b_covid_cope_1_"), starts_with("b_covid_cope_1_"))
f2 <- cbind(connecting_with_others, contacting_a_healthcare_provider,
drinking_alcohol, smoking_more_cigarettes_or_vaping_more) ~ 1
set.seed(1017)
cope_LCA2 <- poLCA(f2, data = cope_dat_LCA,
nclass = 2)
cope_LCA3 <- poLCA(f2, data = cope_dat_LCA,
nclass = 3)
cope_LCA4 <- poLCA(f2, data = cope_dat_LCA,
nclass = 4)
cope_LCA5 <- poLCA(f2, data = cope_dat_LCA,
nclass = 5)
cope_models <- list(cope_LCA2, cope_LCA3, cope_LCA4, cope_LCA5)
cope_model_stats <- data.frame(
Model = 2:5,
G2 = sapply(cope_models, function(x) x$Gsq),
AIC = sapply(cope_models, function(x) x$aic),
BIC = sapply(cope_models, function(x) x$bic)
)kable(cope_model_stats, caption = "Model Comparison for Different Number of Classes (Coping strategy variable)")| Model | G2 | AIC | BIC |
|---|---|---|---|
| 2 | 29.066630 | 4548.577 | 4596.402 |
| 3 | 6.162343 | 4535.673 | 4610.067 |
| 4 | 6.058262 | 4545.569 | 4646.533 |
| 5 | 1.878940 | 4551.389 | 4678.923 |
1.2.2.2 Step 2: Classification error: averaged posterior probability(APP)
print averaged posterior probability(APP) for coping strategy, results suggest low classification error(APP>0.7).
posterior_probs <- cope_LCA3$posterior
assigned_class <- apply(posterior_probs, 1, which.max)
mean_posterior_by_class <- numeric(ncol(posterior_probs))
for (k in 1:ncol(posterior_probs)) {
mean_posterior_by_class[k] <- mean(posterior_probs[assigned_class == k, k])
}
print(mean_posterior_by_class)## [1] 0.8685438 0.7917650 0.9089532
1.2.2.3 Step 3: Final latent class probabilities
Similarly, get the predicted posterior probability of the latent variable for coping strategy.
## get predicted class (gender identity)
cope_class <- cope_LCA3$predclass
cope_predprob <- cope_LCA3$posterior
colnames(cope_predprob) <- paste0("Class",c(1:3))
cope_predprob %>% datatable(
rownames = FALSE,
options = list(
pageLength = 5,
columnDefs = list(list(className = 'dt-center',
targets = 0:2))))1.2.2.4 Step 4: Assign label to each latent class (coping strategies)
probs_list <- cope_LCA3$probs
probs_df <- do.call(rbind, lapply(names(probs_list), function(var) {
prob_df <- as.data.frame(probs_list[[var]])
prob_df$variable <- var
prob_df$class <- rownames(prob_df)
prob_df
})) %>% dplyr::select(-`Pr(1)`)
probs_df_list <- list()
for (var in names(probs_list)) {
prob_df <- as.data.frame(probs_list[[var]])
prob_df$variable <- var
prob_df$class <- rownames(prob_df)
probs_df_list[[var]] <- prob_df
}
probs_df_combined <- bind_rows(probs_df_list)%>%
dplyr::select(-`Pr(1)`) %>%
mutate(variable = dplyr::recode(variable,
"connecting_with_others" = "connect with others",
"contacting_a_healthcare_provider" = "contact healthcare provider",
"drinking_alcohol" = "drink alcohol",
"smoking_more_cigarettes_or_vaping_more" = "smoke/vape more")) %>%
mutate(class = dplyr::recode(class,
"class 3: " = "Class 3: Positive ",
"class 2: " = "Class 2: No action",
"class 1: " = "Class 1: Combined")
) %>%
group_by(variable) %>%
mutate(maxprob = max(`Pr(2)`)) %>%
ungroup() %>%
mutate(maxprob = ifelse(maxprob==`Pr(2)`,maxprob,NA) %>% round(2),
maxprob = ifelse(maxprob==0.16,NA,maxprob))
cope_prob_plot <- ggplot(probs_df_combined, aes(x = variable, y = `Pr(2)`, color = class, group = class)) +
geom_line(size = 1) +
geom_point(size = 1.5)+
geom_text(aes(label = round(maxprob, 2)), vjust = -1, size = 3, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12)) +
labs(x = "", y = "Probability of answering yes ") +
ggtitle("(B) Latent classes for coping strategies")+
scale_color_brewer(palette = "Set1") +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
axis.title.y = element_text(size = 12),
legend.title = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
cope_prob_plot1.3 Missing values
98.6% of the subjects have complete information.
miss_dat <- cope_subset1 %>%
mutate(across(starts_with("recode_race"), ~ ifelse(. == "Prefer not to answer", NA, .))) %>%
mutate(b_dem_sex = ifelse(b_dem_sex %in% c("Prefer not to say", "Other"), NA, b_dem_sex)) %>%
mutate(across(starts_with("recode_orient"), ~ ifelse(. == "I do not want to respond", NA, .)))%>% dplyr::select(b_dem_sex,recode_orient,recode_race)
colnames(miss_dat) <- c("Biological sex","Sexual orientation","Race")
vis_miss(miss_dat)Tabulate the number and percentage of missing. The missing rate is low, so complete case analysis will be used later.
missing_summary <- miss_var_summary(miss_dat)
miss_plot <- ggplot(missing_summary, aes(x = reorder(variable, -n_miss), y = n_miss)) +
geom_bar(stat = "identity", fill = "lightblue",size=0.1) +
geom_text(aes(label = paste0(n_miss, " (", round(pct_miss, 2), "%)")),
hjust = 1, vjust = 0.5, color = "black") +
coord_flip() +
theme_minimal() +
labs( #title = "Missing Data by Variable",
x = "Variable",
y = "Missing Count (Percentage)") +
theme(axis.text.y = element_text(size = 10),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, face = "bold")) +
# coord_cartesian(ylim = c(0, 40))+
theme_minimal()
miss_plot Check if there is any missing pattern among missing variables: No pattern presents!
1.4 The final working dataset
After some investigation, I decided to do a complete case analysis(CCA), the final working dataset is then generated.
dat_cc <- cope_subset3 %>%
filter(recode_race!="Prefer not to answer") %>% ## 1468
filter(b_dem_sex!="Prefer not to say" & b_dem_sex!="Other") %>% ## 1447
filter(orientation!="I do not want to respond") ## 1441cope_subset2 <- cope_subset1 %>%
mutate(across(starts_with("recode_race"), ~ ifelse(. == "Prefer not to answer", NA, .))) %>%
mutate(b_dem_sex = ifelse(b_dem_sex %in% c("Prefer not to say", "Other"), NA, b_dem_sex)) %>%
mutate(across(starts_with("recode_orient"), ~ ifelse(. == "I do not want to respond", NA, .)))
## use the latent variable genderid3 as a surragate for a series of gener identity related quesitons
cope_subset2$genderid3 <- genderid_class
## use the latent variable cope3 as a surragate for a series of coping strategies related quesitons
cope_subset2$cope3 <- cope_class
cope_subset3 <- cope_subset2 %>%
mutate(family_num =challenge_num_dat$challenge_num3,
family_cat = challenge_cat_dat$challenge_cat) %>%
dplyr::select(c("b_response_id", "condition", "b_cdi_mean", "f1_cdi_mean", "recode_race",
#"b_dem_gender_agender", "b_dem_gender_not_sure",
#"b_dem_gender_other_please_specify", "b_dem_gender_androgynous",
#"b_dem_gender_nonbinary", "b_dem_gender_two_spirited", "b_dem_gender_female_to_male_transgender_ftm",
#"b_dem_gender_trans_female_trans_feminine", "b_dem_gender_trans_male_trans_masculine",
#"b_dem_gender_gender_expansive", "b_dem_gender_third_gender",
#"b_dem_gender_genderqueer", "b_dem_gender_male_to_female_transgender_mtf",
# "b_dem_gender_man_boy", "b_dem_gender_transgender", "b_dem_gender_woman_girl",
"b_screener_age", "b_dem_sex", "recode_orient",
#"b_covid_family_family_did_not_enough_enough_money_for_food",
#"b_covid_family_family_did_not_have_a_regular_place_to_sleep_or_stay",
#"b_covid_family_i_could_not_attend_school_in_person", "b_covid_family_i_could_not_attend_school_at_all",
#"b_covid_family_other", "b_covid_family_family_did_not_have_enough_money_for_gas_transportation",
#"b_covid_family_family_did_not_have_enough_money_to_pay_rent",
#"b_covid_family_the_covid_19_pandemic_has_not_affected_me_or_my_family_in_these_ways_in_the_past_2_weeks",
#"b_covid_cope_1_connecting_with_others", "b_covid_cope_1_including_talking_with_people_you_trust_about_your_concerns_and_how_you_are_feeling",
#"b_covid_cope_1_contacting_a_healthcare_provider", "b_covid_cope_1_drinking_alcohol",
#"b_covid_cope_1_smoking_more_cigarettes_or_vaping_more",
"recode_language",
"genderid3","family_num","family_cat",
# "family3",
"cope3")) %>%
mutate(
# condition = factor(condition),
recode_race = factor(recode_race),
# b_screener_age = factor(b_screener_age),
b_dem_sex = factor(b_dem_sex),
recode_orient = factor(recode_orient),
genderid3 = factor(genderid3),
family_num = factor(family_num),
family_cat = factor(family_cat),
cope3 = factor(cope3)
)
dat_cc <- cope_subset3 %>%
filter(!is.na(recode_race)) %>% ## 1468
filter(!is.na(b_dem_sex)) %>% ## 1447
filter(!is.na(recode_orient) )## 1441
dat_cc %>% datatable(
rownames = FALSE,
options = list(
pageLength = 5))table1_dat <- cope_subset3 %>%
mutate(genderid3 = dplyr::recode(genderid3,
"1" = "Non-binary",
"2" = "Women/girls",
"3" = "Male/Masculine")
) %>%
mutate(family_num = dplyr::recode(family_num,
"0" = "0",
"2" = ">=2",
"1" = "1")
) %>%
mutate(cope3 = dplyr::recode(cope3,
"3" = "Positive",
"2" = "No action",
"1" = "Combined")
) %>%
filter(!is.na(recode_race)) %>% ## 1468
filter(!is.na(b_dem_sex)) %>% ## 1447
filter(!is.na(recode_orient) )## 1441
colnames(table1_dat) <- c("id", "condition", "Baseline CDI mean score(0-2)", "3-month CDI mean score",
"Race", "Age (yrs)", "Biological sex", "Sexual orientation",
"Language", "Gender identity", "Number of challenges", "Type of challenges", "Type of coping strategies"
)2 Descriptive table (table 1)
A Descriptive table is generated using the 1441 subjects with complete data. Summary stats is stratified by treatment condition.
table1_cc <- tbl_summary(table1_dat %>% dplyr::select(-id,-`3-month CDI mean score`),
by = condition, statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"), digits = all_continuous() ~ 2,
# label = list(
# b_dem_sex ~ "Biological Sex",
# b_dem_orientation ~ "Sexual Orientation"
# #,
# # b_cdi_sum ~ "Baseline CDI Sum Score (0-24)"
# )
) %>%
modify_header(label ~ "**Demographics**") %>%
modify_spanning_header(c("stat_1", "stat_2","stat_3") ~ "**Treatment Received**")
table1_cc| Demographics | Treatment Received | ||
|---|---|---|---|
| Placebo Control N = 4881 |
Project ABC N = 4891 |
Project Personality N = 4641 |
|
| Baseline CDI mean score(0-2) | 1.16 (0.35) | 1.15 (0.34) | 1.17 (0.36) |
| Race | |||
| Asian Including Asian Desi | 50 (10%) | 58 (12%) | 50 (11%) |
| Black/African-American | 33 (6.8%) | 40 (8.2%) | 36 (7.8%) |
| Hispanic/Latinx | 57 (12%) | 61 (12%) | 53 (11%) |
| Mixed | 74 (15%) | 68 (14%) | 63 (14%) |
| White | 274 (56%) | 262 (54%) | 262 (56%) |
| Age (yrs) | |||
| 13 | 28 (5.7%) | 32 (6.5%) | 28 (6.0%) |
| 14 | 77 (16%) | 81 (17%) | 63 (14%) |
| 15 | 150 (31%) | 156 (32%) | 162 (35%) |
| 16 | 233 (48%) | 220 (45%) | 211 (45%) |
| Biological sex | |||
| Female | 434 (89%) | 437 (89%) | 418 (90%) |
| Male | 54 (11%) | 52 (11%) | 46 (9.9%) |
| Sexual orientation | |||
| Heterosexual | 108 (22%) | 97 (20%) | 106 (23%) |
| LGBTQ | 309 (63%) | 327 (67%) | 291 (63%) |
| Other | 71 (15%) | 65 (13%) | 67 (14%) |
| Language | |||
| English | 476 (98%) | 476 (97%) | 450 (97%) |
| Other | 12 (2.5%) | 13 (2.7%) | 14 (3.0%) |
| Gender identity | |||
| Non-binary | 102 (21%) | 98 (20%) | 86 (19%) |
| Women/girls | 313 (64%) | 325 (66%) | 306 (66%) |
| Male/Masculine | 73 (15%) | 66 (13%) | 72 (16%) |
| Number of challenges | |||
| 0 | 98 (20%) | 99 (20%) | 85 (18%) |
| 1 | 276 (57%) | 297 (61%) | 275 (59%) |
| >=2 | 114 (23%) | 93 (19%) | 104 (22%) |
| Type of challenges | |||
| No impact | 98 (20%) | 99 (20%) | 85 (18%) |
| Other | 119 (24%) | 97 (20%) | 124 (27%) |
| School | 271 (56%) | 293 (60%) | 255 (55%) |
| Type of coping strategies | |||
| Combined | 58 (12%) | 61 (12%) | 53 (11%) |
| No action | 228 (47%) | 235 (48%) | 236 (51%) |
| Positive | 202 (41%) | 193 (39%) | 175 (38%) |
| 1 Mean (SD); n (%) | |||
## write tex file
table1_df <- as.data.frame(table1_cc)
latex_table1 <- xtable(table1_df)
writeLines(print(latex_table1, type = "latex",include.rownames = FALSE), paste0("table1_df_",Sys.Date(),".tex"))## % latex table generated in R 4.3.1 by xtable 1.8-4 package
## % Fri Aug 2 16:23:19 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{llll}
## \hline
## **Demographics** & **Placebo Control**
## N = 488 & **Project ABC**
## N = 489 & **Project Personality**
## N = 464 \\
## \hline
## Baseline CDI mean score(0-2) & 1.16 (0.35) & 1.15 (0.34) & 1.17 (0.36) \\
## Race & & & \\
## Asian Including Asian Desi & 50 (10\%) & 58 (12\%) & 50 (11\%) \\
## Black/African-American & 33 (6.8\%) & 40 (8.2\%) & 36 (7.8\%) \\
## Hispanic/Latinx & 57 (12\%) & 61 (12\%) & 53 (11\%) \\
## Mixed & 74 (15\%) & 68 (14\%) & 63 (14\%) \\
## White & 274 (56\%) & 262 (54\%) & 262 (56\%) \\
## Age (yrs) & & & \\
## 13 & 28 (5.7\%) & 32 (6.5\%) & 28 (6.0\%) \\
## 14 & 77 (16\%) & 81 (17\%) & 63 (14\%) \\
## 15 & 150 (31\%) & 156 (32\%) & 162 (35\%) \\
## 16 & 233 (48\%) & 220 (45\%) & 211 (45\%) \\
## Biological sex & & & \\
## Female & 434 (89\%) & 437 (89\%) & 418 (90\%) \\
## Male & 54 (11\%) & 52 (11\%) & 46 (9.9\%) \\
## Sexual orientation & & & \\
## Heterosexual & 108 (22\%) & 97 (20\%) & 106 (23\%) \\
## LGBTQ & 309 (63\%) & 327 (67\%) & 291 (63\%) \\
## Other & 71 (15\%) & 65 (13\%) & 67 (14\%) \\
## Language & & & \\
## English & 476 (98\%) & 476 (97\%) & 450 (97\%) \\
## Other & 12 (2.5\%) & 13 (2.7\%) & 14 (3.0\%) \\
## Gender identity & & & \\
## Non-binary & 102 (21\%) & 98 (20\%) & 86 (19\%) \\
## Women/girls & 313 (64\%) & 325 (66\%) & 306 (66\%) \\
## Male/Masculine & 73 (15\%) & 66 (13\%) & 72 (16\%) \\
## Number of challenges & & & \\
## 0 & 98 (20\%) & 99 (20\%) & 85 (18\%) \\
## 1 & 276 (57\%) & 297 (61\%) & 275 (59\%) \\
## $>$=2 & 114 (23\%) & 93 (19\%) & 104 (22\%) \\
## Type of challenges & & & \\
## No impact & 98 (20\%) & 99 (20\%) & 85 (18\%) \\
## Other & 119 (24\%) & 97 (20\%) & 124 (27\%) \\
## School & 271 (56\%) & 293 (60\%) & 255 (55\%) \\
## Type of coping strategies & & & \\
## Combined & 58 (12\%) & 61 (12\%) & 53 (11\%) \\
## No action & 228 (47\%) & 235 (48\%) & 236 (51\%) \\
## Positive & 202 (41\%) & 193 (39\%) & 175 (38\%) \\
## \hline
## \end{tabular}
## \end{table}
age_summary <- table1_dat %>%
mutate(`Age (yrs)` = as.numeric(`Age (yrs)`)) %>%
group_by(condition) %>%
summarise(
count = n(),
mean_age = mean(`Age (yrs)`, na.rm = TRUE),
sd_age = sd(`Age (yrs)`, na.rm = TRUE),
min_age = min(`Age (yrs)`, na.rm = TRUE),
max_age = max(`Age (yrs)`, na.rm = TRUE),
median_age = median(`Age (yrs)`),
IQR_age = IQR(`Age (yrs)`, na.rm = TRUE)
)3 Baseline CDI Prediction Model
Will use risk-based approach.
The first step is to construct the baseline CDI prediction model. The focus of the “risk” prediction model is on accurately predicting individuals’ “disease” risk. Several considerations need to be taken into account:
Statistical Model Selection:
Consideration of models: will use linear model (simple and interpretability)
Performance Metrics:
For continuous outcomes, models will be evaluated and selected based on root mean squared error (RMSE), calibration slope, and calibration-at-large.
Overfitting:
Employ a leave-one-out cross-validation (LOOCV) framework to address overfitting. LOOCV will be conducted on 80% of the samples (derivation cohort), while the remaining 20% will serve as a test cohort to mimic an external validation. Final model will be constructed on the entire dataset.
## subset data
dat_abc <- dat_cc %>% dplyr::select(-family_num,-recode_language) %>% ## removed because of corr.
filter(condition!="Project Personality") %>%## ABC vs control
mutate(condition = as.character(condition),
condition = as.factor(condition)
)
dat_person <- dat_cc %>% dplyr::select(-family_num,-recode_language) %>%
filter(condition!="Project ABC") %>% ## Personality vs control
mutate(condition = as.character(condition),
condition = as.factor(condition)
)a glimpse of the variables:
## tibble [977 × 11] (S3: tbl_df/tbl/data.frame)
## $ b_response_id : chr [1:977] "R_02uyQw7i0v8yQRX" "R_089oJuXofDsITq9" "R_08lyOxzUAzbJqcV" "R_0lDh6r7xrSRAemR" ...
## $ condition : Factor w/ 2 levels "Placebo Control",..: 1 1 1 2 1 1 1 2 2 2 ...
## $ b_cdi_mean : num [1:977] 1.417 1.167 1.333 1 0.917 ...
## $ f1_cdi_mean : num [1:977] 1 0.583 1.417 0.833 0.583 ...
## $ recode_race : Factor w/ 5 levels "Asian Including Asian Desi",..: 5 5 5 1 5 4 5 5 5 5 ...
## $ b_screener_age: num [1:977] 14 16 15 14 16 14 14 16 16 16 ...
## $ b_dem_sex : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
## $ recode_orient : Factor w/ 3 levels "Heterosexual",..: 2 2 2 1 1 2 1 1 2 2 ...
## $ genderid3 : Factor w/ 3 levels "1","2","3": 1 2 2 2 2 2 2 2 2 2 ...
## $ family_cat : Factor w/ 3 levels "No impact","Other",..: 2 1 3 3 3 2 3 2 3 2 ...
## $ cope3 : Factor w/ 3 levels "1","2","3": 3 3 2 2 2 2 2 2 2 2 ...
3.1 Build prediction model
Build prediction models using caret
basemodel_dat <- dat_abc %>% dplyr::select(-condition,-b_cdi_mean,-b_response_id,-b_screener_age,)
set.seed(1017)
trainIndex <- createDataPartition(basemodel_dat$f1_cdi_mean, p = .8, list = FALSE, times = 1)
trainData <- basemodel_dat[trainIndex, ]
testData <- basemodel_dat[-trainIndex, ]
train_control <- trainControl(method = "LOOCV")
#train_control <- trainControl(method = "cv", number = 3)
# rf_model <- train(f1_cdi_mean ~ .,
# data = trainData,
# method = "rf",
# trControl = train_control)
tune_grid <- expand.grid(alpha = 0,
lambda = 10^seq(-3, 1, length = 10))
abc_model <- train(f1_cdi_mean ~ .,
data = trainData,
method = "glmnet",
trControl = train_control,
tuneGrid = tune_grid,
metric = "RMSE")
valid_pred <- predict(abc_model, newdata = trainData)
test_pred <- predict(abc_model, newdata = testData)
## mse
rmse_f <- function(actual, pred) {
sqrt(mean((actual - pred) ^ 2))
}
## perf metrics
### rmse
valid_rmse <- rmse_f(trainData$f1_cdi_mean, valid_pred)
test_rmse <- rmse_f(testData$f1_cdi_mean, test_pred)
### calibration slope & at large
valid_cali_m <- lm(trainData$f1_cdi_mean~ valid_pred)
valid_cali_slope <- coef(valid_cali_m)[2]
valid_cali_at_large <- coef(valid_cali_m)[1]
test_cali_m <- lm(testData$f1_cdi_mean~ test_pred)
test_cali_slope <- coef(test_cali_m)[2]
test_cali_at_large <- coef(test_cali_m)[1]
perf_abc <- data.frame(
Metric = c("RMSE", "Calibration Slope", "Calibration at Large"),
Validation = c(valid_rmse, valid_cali_slope, valid_cali_at_large),
Test = c(test_rmse, test_cali_slope, test_cali_at_large)
)basemodel_dat <- dat_person%>% dplyr::select(-condition,-b_cdi_mean,-b_response_id,-b_screener_age,)
set.seed(1017)
trainIndex <- createDataPartition(basemodel_dat$f1_cdi_mean, p = .8, list = FALSE, times = 1)
trainData <- basemodel_dat[trainIndex, ]
testData <- basemodel_dat[-trainIndex, ]
train_control <- trainControl(method = "LOOCV")
#train_control <- trainControl(method = "cv", number = 3)
# rf_model <- train(f1_cdi_mean ~ .,
# data = trainData,
# method = "rf",
# trControl = train_control)
tune_grid <- expand.grid(alpha = 10^seq(0,1, length = 10),
lambda = 10^seq(-3, 1, length = 10))
person_model <- train(f1_cdi_mean ~ .,
data = trainData,
method = "glmnet",
trControl = train_control,
tuneGrid = tune_grid,
metric = "RMSE")
valid_pred <- predict(person_model, newdata = trainData)
test_pred <- predict(person_model, newdata = testData)
## mse
rmse_f <- function(actual, pred) {
sqrt(mean((actual - pred) ^ 2))
}
## perf metrics
### rmse
valid_rmse <- rmse_f(trainData$f1_cdi_mean, valid_pred)
test_rmse <- rmse_f(testData$f1_cdi_mean, test_pred)
### calibration slope & at large
valid_cali_m <- lm(trainData$f1_cdi_mean~ valid_pred)
valid_cali_slope <- coef(valid_cali_m)[2]
valid_cali_at_large <- coef(valid_cali_m)[1]
test_cali_m <- lm(testData$f1_cdi_mean~ test_pred)
test_cali_slope <- coef(test_cali_m)[2]
test_cali_at_large <- coef(test_cali_m)[1]
perf_person <- data.frame(
Metric = c("RMSE", "Calibration Slope", "Calibration at Large"),
Validation = c(valid_rmse, valid_cali_slope, valid_cali_at_large),
Test = c(test_rmse, test_cali_slope, test_cali_at_large)
)Summary of baseline model output of project ABC model:
## Length Class Mode
## a0 100 -none- numeric
## beta 1300 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
Summary of baseline model output of project personality model:
## Length Class Mode
## a0 65 -none- numeric
## beta 845 dgCMatrix S4
## df 65 -none- numeric
## dim 2 -none- numeric
## lambda 65 -none- numeric
## dev.ratio 65 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
3.2 Evaluate model performance
Summaries the model performance in the following table:
rownames(perf_abc) <- c()
rownames(perf_person) <- c()
kable(perf_abc, caption = "Model performance of baseline risk model (project ABC)")| Metric | Validation | Test |
|---|---|---|
| RMSE | 0.3992314 | 0.3959155 |
| Calibration Slope | 1.1575945 | 0.9232901 |
| Calibration at Large | -0.1543406 | 0.0670571 |
| Metric | Validation | Test |
|---|---|---|
| RMSE | 0.4055562 | 0.4266310 |
| Calibration Slope | 1.0819270 | 0.0040360 |
| Calibration at Large | -0.0809549 | 0.9874272 |
3.3 Final baseline prediciton model
The final model is constructed on the entire dataset
basemodel_dat_abc <- dat_abc %>% dplyr::select(-condition,-b_cdi_mean,-b_response_id,-b_screener_age,)
best_lambda_abc <- abc_model$bestTune$lambda
best_alpha <- abc_model$bestTune$alpha ## fixed 0 (ridge)
covariates_abc <- model.matrix(f1_cdi_mean ~ ., data = basemodel_dat_abc)[, -1] # Create the model matrix
y_abc <- basemodel_dat_abc$f1_cdi_mean
abc_base_m <- glmnet::glmnet(covariates_abc, y_abc, alpha = best_alpha, lambda = best_lambda_abc)
coef(abc_base_m)## 14 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 1.024079643
## recode_raceBlack/African-American -0.029861721
## recode_raceHispanic/Latinx -0.067205445
## recode_raceMixed -0.008337803
## recode_raceWhite 0.041071994
## b_dem_sexMale -0.121873161
## recode_orientLGBTQ 0.073487928
## recode_orientOther 0.102386860
## genderid32 -0.109173072
## genderid33 -0.018497319
## family_catOther 0.060132421
## family_catSchool 0.020780601
## cope32 -0.024043888
## cope33 -0.112895337
abc_base_m <- lm(basemodel_dat_abc$f1_cdi_mean~.,data = basemodel_dat_abc)
# sjPlot::tab_model(abc_base_m)
basemodel_dat_person <- dat_person %>% dplyr::select(-condition,-b_cdi_mean,-b_response_id,-b_screener_age,)
best_lambda_person <- person_model$bestTune$lambda
best_alpha <- abc_model$bestTune$alpha ## fixed 0 (ridge)
covariates_person <- model.matrix(f1_cdi_mean ~ ., data = basemodel_dat_person)[, -1] # Create the model matrix
y_person <- basemodel_dat_person$f1_cdi_mean
person_base_m <- glmnet::glmnet(covariates_person, y_person, alpha = best_alpha, lambda = best_lambda_person)
coef(person_base_m)## 14 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 1.152897654
## recode_raceBlack/African-American -0.054500296
## recode_raceHispanic/Latinx -0.048753462
## recode_raceMixed -0.030923716
## recode_raceWhite -0.002056701
## b_dem_sexMale -0.104694777
## recode_orientLGBTQ 0.068796666
## recode_orientOther 0.064098555
## genderid32 -0.146439033
## genderid33 -0.102595869
## family_catOther 0.020356363
## family_catSchool -0.015311446
## cope32 -0.054863363
## cope33 -0.125546490
person_base_m <- lm(basemodel_dat_person$f1_cdi_mean~.,data = basemodel_dat_person)
# sjPlot::tab_model(person_base_m)3.3.1 Project ABC baseline model
A summary of the baseline prediction model, for project ABC:
| Dependent variable | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.06 | 0.92 – 1.21 | <0.001 |
|
recode race [Black/African-American] |
-0.04 | -0.16 – 0.08 | 0.527 |
|
recode race [Hispanic/Latinx] |
-0.08 | -0.19 – 0.02 | 0.129 |
| recode race [Mixed] | -0.03 | -0.13 – 0.08 | 0.617 |
| recode race [White] | 0.03 | -0.05 – 0.12 | 0.462 |
| b dem sex [Male] | -0.15 | -0.25 – -0.04 | 0.007 |
| recode orient [LGBTQ] | 0.09 | 0.02 – 0.16 | 0.010 |
| recode orient [Other] | 0.12 | 0.03 – 0.22 | 0.008 |
| genderid3 [2] | -0.13 | -0.20 – -0.06 | <0.001 |
| genderid3 [3] | -0.02 | -0.12 – 0.08 | 0.687 |
| family cat [Other] | 0.07 | -0.01 – 0.15 | 0.070 |
| family cat [School] | 0.03 | -0.03 – 0.10 | 0.313 |
| cope3 [2] | -0.06 | -0.14 – 0.02 | 0.137 |
| cope3 [3] | -0.16 | -0.24 – -0.08 | <0.001 |
| Observations | 977 | ||
| R2 / R2 adjusted | 0.079 / 0.067 | ||
3.3.2 Project personality baseline model
for project personality:
| Dependent variable | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.16 | 1.00 – 1.31 | <0.001 |
|
recode race [Black/African-American] |
-0.06 | -0.18 – 0.07 | 0.387 |
|
recode race [Hispanic/Latinx] |
-0.05 | -0.16 – 0.06 | 0.382 |
| recode race [Mixed] | -0.03 | -0.14 – 0.08 | 0.552 |
| recode race [White] | -0.00 | -0.09 – 0.09 | 0.937 |
| b dem sex [Male] | -0.11 | -0.22 – 0.01 | 0.066 |
| recode orient [LGBTQ] | 0.07 | 0.00 – 0.14 | 0.046 |
| recode orient [Other] | 0.06 | -0.03 – 0.16 | 0.182 |
| genderid3 [2] | -0.15 | -0.22 – -0.08 | <0.001 |
| genderid3 [3] | -0.10 | -0.21 – -0.00 | 0.049 |
| family cat [Other] | 0.02 | -0.06 – 0.10 | 0.625 |
| family cat [School] | -0.02 | -0.09 – 0.05 | 0.664 |
| cope3 [2] | -0.06 | -0.15 – 0.03 | 0.194 |
| cope3 [3] | -0.13 | -0.22 – -0.04 | 0.005 |
| Observations | 952 | ||
| R2 / R2 adjusted | 0.052 / 0.039 | ||
3.4 LRT
LRT was used to test the sig. of variables in the baseline model
For project ABC:
## LRT for each variables
### define model formulas
full_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + genderid3 + family_cat + cope3
gender_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + family_cat + cope3
race_formula <- f1_cdi_mean ~ b_dem_sex + recode_orient + genderid3 + family_cat + cope3
sex_formula <- f1_cdi_mean ~ recode_race + recode_orient + genderid3 + family_cat + cope3
orient_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + genderid3 + family_cat + cope3
family_cat_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + genderid3 + cope3
cope_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + genderid3 + family_cat
full_person <- lm(as.formula(full_formula),data =basemodel_dat_person )
gender_person <- lm(as.formula(gender_formula),data = basemodel_dat_person)
lrt_result <- lmtest::lrtest(gender_person,full_person)
deviance <- lrt_result$Chisq[2]
# Extract the degrees of freedom
df <- lrt_result$Df[2]
# Extract the p-value
p_value <- lrt_result$`Pr(>Chisq)`[2] %>% round(2)
full_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + genderid3 + family_cat + cope3
gender_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + family_cat + cope3
race_formula <- f1_cdi_mean ~ b_dem_sex + recode_orient + genderid3 + family_cat + cope3
sex_formula <- f1_cdi_mean ~ recode_race + recode_orient + genderid3 + family_cat + cope3
orient_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + genderid3 + family_cat + cope3
family_cat_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + genderid3 + cope3
cope_formula <- f1_cdi_mean ~ recode_race + b_dem_sex + recode_orient + genderid3 + family_cat
full_abc <- lm(as.formula(full_formula), data = basemodel_dat_abc)
full_person <- lm(as.formula(full_formula), data = basemodel_dat_person)
lrt_person_f <- function(full_model, reduced_formula) {
reduced_model <- lm(as.formula(reduced_formula), data = basemodel_dat_person)
lrt_result <- lrtest(reduced_model,full_model )
deviance <- lrt_result$Chisq[2]
df <- lrt_result$Df[2]
p_value <- round(lrt_result$`Pr(>Chisq)`[2], 2)
return(data.frame(Deviance = deviance, DF = df, P_value = p_value))
}
lrt_abc_f <- function(full_model, reduced_formula) {
reduced_model <- lm(as.formula(reduced_formula), data = basemodel_dat_abc)
lrt_result <- lrtest(reduced_model,full_model)
deviance <- lrt_result$Chisq[2]
df <- lrt_result$Df[2]
p_value <- round(lrt_result$`Pr(>Chisq)`[2], 2)
return(data.frame(Deviance = deviance, DF = df, P_value = p_value))
}
reduced_models <- list(
`Gender identity` = gender_formula,
`Race` = race_formula,
`Biological sex` = sex_formula,
`Sexual Orientation` = orient_formula,
`Challenges` = family_cat_formula,
`Coping strategies` = cope_formula
)
## lrt for project ABC
dev_results_abc <- lapply(names(reduced_models), function(var) {
result <- lrt_abc_f(full_abc, reduced_models[[var]])
result$Variable <- var
return(result)
})
Dev_df_abc <- bind_rows(dev_results_abc) %>%
dplyr::select(Variable,everything())
kable(Dev_df_abc)| Variable | Deviance | DF | P_value |
|---|---|---|---|
| Gender identity | 15.456260 | 2 | 0.00 |
| Race | 9.522943 | 4 | 0.05 |
| Biological sex | 7.509756 | 1 | 0.01 |
| Sexual Orientation | 8.728432 | 2 | 0.01 |
| Challenges | 3.339847 | 2 | 0.19 |
| Coping strategies | 19.418374 | 2 | 0.00 |
## perform lrt (person)
dev_results_person <- lapply(names(reduced_models), function(var) {
result <- lrt_person_f(full_person, reduced_models[[var]])
result$Variable <- var
return(result)
})
Dev_df_person <- bind_rows(dev_results_person) %>%
dplyr::select(Variable,everything())For project personality
| Variable | Deviance | DF | P_value |
|---|---|---|---|
| Gender identity | 16.358782 | 2 | 0.00 |
| Race | 2.266644 | 4 | 0.69 |
| Biological sex | 3.441522 | 1 | 0.06 |
| Sexual Orientation | 4.084130 | 2 | 0.13 |
| Challenges | 1.220614 | 2 | 0.54 |
| Coping strategies | 10.608348 | 2 | 0.00 |
For Project Personality
4 Investigate HTE
4.1 Main effect only model
first, replicate the model in the original paper for comparison. the main effect model which adjusted for baseline CDI score is specified as:\[E(Y|\bf{X}) = \text{baseline CDI}+ \text{condition}\]
abc_main_dat <- dat_abc %>% dplyr::select(f1_cdi_mean,condition,b_cdi_mean)
abc_main_m <- lm(f1_cdi_mean~ b_cdi_mean + condition,data=abc_main_dat)
person_main_dat <- dat_person %>% dplyr::select(f1_cdi_mean,condition,b_cdi_mean)
person_main_m <- lm(f1_cdi_mean~ b_cdi_mean + condition,data=person_main_dat)print the model, compare with the published paper (results are similar): for project ABC vs control:
##
## Call:
## lm(formula = f1_cdi_mean ~ b_cdi_mean + condition, data = abc_main_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.18567 -0.21921 0.01075 0.22509 1.04660
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.27471 0.04097 6.705 3.42e-11 ***
## b_cdi_mean 0.64303 0.03263 19.706 < 2e-16 ***
## conditionProject ABC -0.07953 0.02234 -3.560 0.000389 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3492 on 974 degrees of freedom
## Multiple R-squared: 0.2922, Adjusted R-squared: 0.2907
## F-statistic: 201 on 2 and 974 DF, p-value: < 2.2e-16
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.27 | 0.19 – 0.36 | <0.001 |
| b cdi mean | 0.64 | 0.58 – 0.71 | <0.001 |
| condition [Project ABC] | -0.08 | -0.12 – -0.04 | <0.001 |
| Observations | 977 | ||
| R2 / R2 adjusted | 0.292 / 0.291 | ||
for project personality vs control:
##
## Call:
## lm(formula = f1_cdi_mean ~ b_cdi_mean + condition, data = person_main_dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17752 -0.22054 0.01222 0.23434 1.03109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.31125 0.04188 7.432 2.38e-13 ***
## b_cdi_mean 0.61148 0.03329 18.366 < 2e-16 ***
## conditionProject Personality -0.06895 0.02337 -2.950 0.00325 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3604 on 949 degrees of freedom
## Multiple R-squared: 0.2664, Adjusted R-squared: 0.2649
## F-statistic: 172.3 on 2 and 949 DF, p-value: < 2.2e-16
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.31 | 0.23 – 0.39 | <0.001 |
| b cdi mean | 0.61 | 0.55 – 0.68 | <0.001 |
|
condition [Project Personality] |
-0.07 | -0.11 – -0.02 | 0.003 |
| Observations | 952 | ||
| R2 / R2 adjusted | 0.266 / 0.265 | ||
4.2 HTE model
The HTE is defined as: \[E(Y|\bf{X}) = \text{baseline CDI}+ \text{condition} + \text{lp}+\text{condition}\times \text{lp}\] where lp is the linear predictor of the baseline model.
lp_abc <- predict(abc_base_m, dat_abc)
lp_person <- predict(person_base_m, dat_person)
abc_hte_m <- lm(f1_cdi_mean~ b_cdi_mean + condition*lp_abc,data=dat_abc)
person_hte_m <- lm(f1_cdi_mean~ b_cdi_mean + condition*lp_person,data=dat_person)4.2.1 HTE Project ABC
The hte model for Project ABC vs control:
##
## Call:
## lm(formula = f1_cdi_mean ~ b_cdi_mean + condition * lp_abc, data = dat_abc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.16587 -0.21245 0.01587 0.22319 1.04761
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20672 0.13296 1.555 0.1203
## b_cdi_mean 0.60042 0.03517 17.070 <2e-16 ***
## conditionProject ABC -0.46326 0.18742 -2.472 0.0136 *
## lp_abc 0.11985 0.14022 0.855 0.3929
## conditionProject ABC:lp_abc 0.39263 0.19023 2.064 0.0393 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3471 on 972 degrees of freedom
## Multiple R-squared: 0.302, Adjusted R-squared: 0.2991
## F-statistic: 105.1 on 4 and 972 DF, p-value: < 2.2e-16
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.21 | -0.05 – 0.47 | 0.120 |
| b cdi mean | 0.60 | 0.53 – 0.67 | <0.001 |
| condition [Project ABC] | -0.46 | -0.83 – -0.10 | 0.014 |
| lp abc | 0.12 | -0.16 – 0.40 | 0.393 |
|
condition [Project ABC] × lp abc |
0.39 | 0.02 – 0.77 | 0.039 |
| Observations | 977 | ||
| R2 / R2 adjusted | 0.302 / 0.299 | ||
4.2.2 HTE Project personality
The hte model for Project personality vs control:
##
## Call:
## lm(formula = f1_cdi_mean ~ b_cdi_mean + condition * lp_person,
## data = dat_person)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1604 -0.2189 0.0176 0.2321 1.0427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.18881 0.16684 1.132 0.258
## b_cdi_mean 0.58760 0.03544 16.580 <2e-16 ***
## conditionProject Personality -0.28385 0.24270 -1.170 0.242
## lp_person 0.15184 0.17399 0.873 0.383
## conditionProject Personality:lp_person 0.21744 0.24431 0.890 0.374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3599 on 947 degrees of freedom
## Multiple R-squared: 0.27, Adjusted R-squared: 0.2669
## F-statistic: 87.56 on 4 and 947 DF, p-value: < 2.2e-16
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.19 | -0.14 – 0.52 | 0.258 |
| b cdi mean | 0.59 | 0.52 – 0.66 | <0.001 |
|
condition [Project Personality] |
-0.28 | -0.76 – 0.19 | 0.242 |
| lp person | 0.15 | -0.19 – 0.49 | 0.383 |
|
condition [Project Personality] × lp person |
0.22 | -0.26 – 0.70 | 0.374 |
| Observations | 952 | ||
| R2 / R2 adjusted | 0.270 / 0.267 | ||
5 Evaluate HTE
The HTE is evaluated using calibration plot.
5.1 Point estiamtes
First, report the point estiamtes of ATE and cATE.
## compute average treatment effect
#ate_dat <- dat_abc
# ate_dat$condition = 'Placebo Control'
#
# abc_ate_ctrl <- predict(abc_main_m,ate_dat)
#
# ate_dat$condition = 'Project ABC'
#
# abc_ate_trt <- predict(abc_main_m,ate_dat)
#
# abc_ate_trt-abc_ate_ctrl
abc_ate <- coef(abc_main_m)[3]
person_ate <- coef(person_main_m)[3]
## cATE/HTE
## project ABC vs control
hte_dat <- dat_abc
hte_dat$condition = 'Placebo Control'
abc_hte_ctrl <- predict(abc_hte_m,hte_dat)
hte_dat$condition = 'Project ABC'
abc_hte_trt <- predict(abc_hte_m,hte_dat)
abc_hte <- abc_hte_trt-abc_hte_ctrl
## hte personality vs control
hte_dat <- dat_person
hte_dat$condition = 'Placebo Control'
person_hte_ctrl <- predict(person_hte_m,hte_dat)
hte_dat$condition = 'Project Personality'
person_hte_trt <- predict(person_hte_m,hte_dat)
person_hte <- person_hte_trt-person_hte_ctrl
point_df <- data.frame(Comparsion= c("Project ABC vs Control","Project personality vs Control" ),
ATE = c(abc_ate,person_ate),
SE = c(summary(abc_main_m)$coefficients[, "Std. Error"][3],
summary(person_main_m)$coefficients[, "Std. Error"][3])
)
kable(point_df, caption = "summary of avearged treatment effect")| Comparsion | ATE | SE | |
|---|---|---|---|
| conditionProject ABC | Project ABC vs Control | -0.0795347 | 0.0223413 |
| conditionProject Personality | Project personality vs Control | -0.0689513 | 0.0233696 |
## calibration plot
abc_cal_dat <- data.frame(abc_ate,
abc_hte,
lp_abc)
abc_cal_dat$quantile_grp <- cut(abc_cal_dat$lp_abc,
breaks = quantile(abc_cal_dat$lp_abc, probs = seq(0, 1, by = 0.2)),
include.lowest = TRUE,
labels = FALSE)
## compute the averaged hte effect in each quantile risk grp
abc_avg_hte <- abc_cal_dat %>%
group_by(quantile_grp) %>%
summarise(abc_avg_hte = mean(abc_hte, na.rm = TRUE))## replciate for project personality vs control
## calibration plot
person_cal_dat <- data.frame(person_ate,
person_hte,
lp_person)
person_cal_dat$quantile_grp <- cut(person_cal_dat$lp_person,
breaks = quantile(person_cal_dat$lp_person, probs = seq(0, 1, by = 0.2)),
include.lowest = TRUE,
labels = FALSE)
## compute the averaged hte effect in each quantile risk grp
person_avg_hte <- person_cal_dat %>%
group_by(quantile_grp) %>%
summarise(person_avg_hte = mean(person_hte, na.rm = TRUE))
### this is not correct, should boot from the very beginning
## compute bootstrapped CI
# set.seed(1017)
# person_boot_res <- NULL
# for (i in 1:1000){
# boot.idx <- sample(1:dim(person_cal_dat)[1], size = dim(person_cal_dat)[1], replace = T)
# boot.data <- person_cal_dat[boot.idx,]
# boot.data$quantile_grp <- cut(boot.data$lp_person,
# breaks = quantile(boot.data$lp_person, probs = seq(0, 1, by = 0.2)),
# include.lowest = TRUE,
# labels = FALSE)
#
# person_avg_hte <- boot.data %>%
# group_by(quantile_grp) %>%
# summarise(person_avg_hte = mean(person_hte, na.rm = TRUE))
#
# person_boot_res <- rbind(person_boot_res,person_avg_hte)
# }
#
# person_boot_cali_ci <- person_boot_res %>%
# group_by(quantile_grp) %>%
# summarise(
# lower_quantile = round(quantile(person_avg_hte, probs = 0.025), 4),
# upper_quantile = round(quantile(person_avg_hte, probs = 0.975), 4)
# )
#
#
# person_cali_result <- left_join(person_avg_hte,person_boot_cali_ci)A brief summary of the linear predictors:
## Length Class Mode
## 0 NULL NULL
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7486 0.9316 0.9751 0.9888 1.0464 1.2441
5.2 Bootstrapped CI
compute bootstrapped CI for cATE:
## define bootstrap function
boot_f <- function(mydat=NULL,seed=1017){
set.seed(seed)
abc_boot_res <- NULL
for (i in 1:1000){
boot.idx <- sample(1:dim(mydat)[1], size = dim(mydat)[1], replace = T)
boot.data <- mydat[boot.idx,]
abc_main_dat <- boot.data %>% dplyr::select(f1_cdi_mean,condition,b_cdi_mean)
abc_main_m <- lm(f1_cdi_mean~ b_cdi_mean + condition,data=abc_main_dat)
lp_abc <- predict(abc_main_m,boot.data)
abc_ate <- coef(abc_main_m)[3]
abc_hte_m <- lm(f1_cdi_mean~ b_cdi_mean + condition*lp_abc,data=boot.data)
## cATE/HTE
## project ABC vs control
hte_dat <- boot.data
trt_levels <- unique(boot.data$condition) %>% as.character()
trt <- trt_levels[!(trt_levels %in% 'Placebo Control')]
hte_dat$condition = 'Placebo Control'
abc_hte_ctrl <- predict(abc_hte_m,hte_dat)
hte_dat$condition = trt
abc_hte_trt <- predict(abc_hte_m,hte_dat)
abc_hte <- abc_hte_trt-abc_hte_ctrl
## calibration plot
abc_cal_dat <- data.frame(abc_ate,
abc_hte,
lp_abc)
abc_cal_dat$quantile_grp <- cut(abc_cal_dat$lp_abc,
breaks = quantile(abc_cal_dat$lp_abc, probs = seq(0, 1, by = 0.2)),
include.lowest = TRUE,
labels = FALSE)
## compute the averaged hte effect in each quantile risk grp
avg_hte <- abc_cal_dat %>%
group_by(quantile_grp) %>%
summarise(avg_hte = mean(abc_hte, na.rm = TRUE))
abc_boot_res <- rbind(abc_boot_res,avg_hte)
}
return(abc_boot_res)
}## compute bootstrapped CI
abc_boot_res <- boot_f(mydat = dat_abc)
abc_boot_cali_ci <- abc_boot_res %>%
group_by(quantile_grp) %>%
summarise(
lower_quantile = round(quantile(avg_hte, probs = 0.025), 4),
upper_quantile = round(quantile(avg_hte, probs = 0.975), 4)
)
abc_cali_result <- left_join(abc_avg_hte,abc_boot_cali_ci)for Project personality:
## for personality
person_boot_res <- boot_f(mydat = dat_person)
person_boot_cali_ci <- person_boot_res %>%
group_by(quantile_grp) %>%
summarise(
lower_quantile = round(quantile(avg_hte, probs = 0.025), 4),
upper_quantile = round(quantile(avg_hte, probs = 0.975), 4)
)
person_cali_result <- left_join(person_avg_hte,person_boot_cali_ci)
## identify the bounds for cali plots
max_cali_y <- max(abc_cali_result$upper_quantile,person_cali_result$upper_quantile)
min_cali_y <- min(abc_cali_result$lower_quantile,person_cali_result$lower_quantile)
## check if the ylims are correct
### I used -0.2 and 0.5 range for the y axis in the calibration plots
logic1 <- min_cali_y <= 0.05 & min_cali_y >=-0.2
logic2 <- max_cali_y <= 0.05 & max_cali_y >=-0.2
if(!logic1 & ! logic2){
print(paste0("please make sure the range of y axis in the following calibration plot is [", min_cali_y, ",",max_cali_y,']'))
}else{logic_3 = logic1+logic2}5.3 Calibration plots
The linear predictor entered model as a continuous variable. For presentation purpose, the linear predictor is discretized into five “risk” groups using quantiles (0.2 incremental). The averaged HTEs/cATEs by “risk” group are calculated and compared with the ATE.
abc_cali_plot <- ggplot(abc_cali_result, aes(x = as.factor(quantile_grp), y = abc_avg_hte)) +
geom_point(size = 2.5) +
# geom_line(aes(group = 1), color = "blue") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile), width = 0.05, color = "black") +
labs(x = "",
y = "Mean heter_mean_diff"
# title = ""
) +
geom_hline(yintercept = abc_ate, linetype = "dashed", color = "grey")+
theme_minimal()+
scale_y_continuous(
limits = c(-0.2, 0.05),
breaks = seq(-0.2, 0.05, by = 0.05)
)+
# ylim(c(min_cali_y,max_cali_y))+
ylab("Mean difference")+
# ylab(expression(atop("Risk Difference, %", atop("Harm" %<-% phantom(" ") %->% "Benefit")))) + # Custom y-axis label
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 14))For Project ABC vs control:
abc_lp_plot <- ggplot(abc_cal_dat, aes(x = lp_abc)) +
geom_histogram(binwidth = 0.02, fill = "black") +
labs(x = "Predicted CDI mean score using baseline covariates", y = NULL) +
theme_minimal()
abc_combo_plot <- cowplot::plot_grid(
abc_cali_plot,
abc_lp_plot,
ncol = 1,
align = "v",
rel_heights = c(3, 1)
)
# Display the combined plot
abc_combo_plot## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6620 0.8980 0.9743 0.9782 1.0605 1.2931
For Project personality vs control:
person_cali_plot <- ggplot(person_cali_result, aes(x = as.factor(quantile_grp), y = person_avg_hte)) +
geom_point(size = 2.5) +
# geom_line(aes(group = 1), color = "blue") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile), width = 0.05, color = "black") +
labs(x = "",
y = "Mean heter_mean_diff"
# title = ""
) +
geom_hline(yintercept = person_ate, linetype = "dashed", color = "grey")+
theme_minimal()+
scale_y_continuous(
limits = c(-0.2, 0.05),
breaks = seq(-0.2, 0.05, by = 0.05)
)+
ylab("Mean difference")+
# ylab(expression(atop("Risk Difference, %", atop("Harm" %<-% phantom(" ") %->% "Benefit")))) + # Custom y-axis label
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 14))person_lp_plot <- ggplot(person_cal_dat, aes(x = lp_person)) +
geom_histogram(binwidth = 0.01, fill = "black") +
labs(x = "Predicted CDI mean score using baseline covariates", y = NULL) +
theme_minimal()
person_combo_plot <- cowplot::plot_grid(
person_cali_plot,
person_lp_plot,
ncol = 1,
align = "v",
rel_heights = c(3, 1)
)
person_combo_plot## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7486 0.9316 0.9751 0.9888 1.0464 1.2441
combined plots for reporting purpose:
## combined plot for report
abc_lp_plot <- ggplot(abc_cal_dat, aes(x = lp_abc)) +
geom_histogram(binwidth = 0.03, fill = "#1f78b4",alpha=0.8) +
labs(x = "Predicted CDI-SF mean score (Project ABC)", y = NULL) +
theme_minimal()
person_lp_plot <- ggplot(person_cal_dat, aes(x = lp_person)) +
geom_histogram(binwidth = 0.03, fill = "#8B0000",alpha=0.8) +
labs(x = "Predicted CDI-SF mean score (Project Personality)", y = NULL) +
theme_minimal()
cal_plots_report <- ggarrange(abc_lp_plot, person_lp_plot, ncol = 2, nrow = 1)
cal_plots_reportggsave("figures/cal_plots_report.png", width = 10, height = 7, dpi = 600)
abc_cal_dat_report <- abc_cal_dat
person_cal_dat_report <- person_cal_dat
abc_cal_dat_report$grp ="Project ABC"
person_cal_dat_report$grp = "Project Personality"
colnames(person_cal_dat_report) = colnames(abc_cal_dat_report)
combined_cal_dat_report<- rbind(abc_cal_dat_report,person_cal_dat_report)
combined_hist_plot <-ggplot(combined_cal_dat_report, aes(x = lp_abc, fill = grp)) +
geom_histogram(binwidth = 0.02, alpha = 0.8, position = "identity") +
labs(x = "Predicted 3-month CDI-SF mean score using baseline prediction models", y = "Frequency", fill = "Project") +
theme_minimal() +
scale_fill_manual(values = c("Project ABC" = "#1f78b4", "Project Personality" = "#8B0000"))
combined_hist_plot5.3.1 Rearrange for report (For Porject ABC vs control)
Rearrange tables for reporting purpose:
max_cali_y <- 0.065
boot1_f <- function(mydat=NULL,seed=1017){
set.seed(seed)
abc_boot_res <- NULL
for (i in 1:1000){
boot.idx <- sample(1:dim(mydat)[1], size = dim(mydat)[1], replace = T)
boot.data <- mydat[boot.idx,]
abc_main_dat <- boot.data %>% dplyr::select(f1_cdi_mean,condition,b_cdi_mean)
abc_main_m <- lm(f1_cdi_mean~ b_cdi_mean + condition,data=abc_main_dat)
lp_abc <- predict(abc_main_m,boot.data)
abc_ate <- coef(abc_main_m)[3]
abc_hte_m <- lm(f1_cdi_mean~ b_cdi_mean + condition*lp_abc,data=boot.data)
## cATE/HTE
## project ABC vs control
hte_dat <- boot.data
trt_levels <- unique(boot.data$condition) %>% as.character()
trt <- trt_levels[!(trt_levels %in% 'Placebo Control')]
hte_dat$condition = 'Placebo Control'
abc_hte_ctrl <- predict(abc_hte_m,hte_dat)
hte_dat$condition = trt
abc_hte_trt <- predict(abc_hte_m,hte_dat)
abc_hte <- abc_hte_trt-abc_hte_ctrl
## calibration plot
abc_cal_dat <- data.frame(abc_ate,
abc_hte,
lp_abc)
abc_cal_dat$quantile_grp <- cut(abc_cal_dat$lp_abc,
breaks = quantile(abc_cal_dat$lp_abc, probs = seq(0, 1, by = 0.1)),
include.lowest = TRUE,
labels = FALSE)
## compute the averaged hte effect in each quantile risk grp
avg_hte <- abc_cal_dat %>%
group_by(quantile_grp) %>%
summarise(avg_hte = mean(abc_hte, na.rm = TRUE))
abc_boot_res <- rbind(abc_boot_res,avg_hte)
}
return(abc_boot_res)
}## calibration plot
abc_cal_dat_copy <- data.frame(abc_ate,
abc_hte,
lp_abc)
abc_cal_dat_copy$quantile_grp <- cut(abc_cal_dat_copy$lp_abc,
breaks = quantile(abc_cal_dat_copy$lp_abc, probs = seq(0, 1, by = 0.1)),
include.lowest = TRUE,
labels = FALSE)
abc_cal_dat_copy$condition <- dat_abc$condition
cali_10group <- table(abc_cal_dat_copy$condition,abc_cal_dat_copy$quantile_grp) %>% as.data.frame()
wide_10group <- tidyr::pivot_wider(cali_10group, names_from = Var1, values_from = Freq)
## risk quantiles
quantiles <- quantile(abc_cal_dat_copy$lp_abc, probs = seq(0, 1, by = 0.1)) %>% round(2)
base_score_int <- paste(head(quantiles, -1), tail(quantiles, -1), sep = " - ")
wide_10group$base_score <- base_score_int
wide_10group <- wide_10group %>%
dplyr::select(Var2, base_score,everything())
## compute the averaged hte effect in each quantile risk grp
abc_avg_hte_copy <- abc_cal_dat_copy %>%
group_by(quantile_grp) %>%
summarise(abc_avg_hte = mean(abc_hte, na.rm = TRUE))
###boot CI
abc_boot_res_copy <- boot1_f(mydat = dat_abc)
abc_boot_cali_ci_copy <- abc_boot_res_copy %>%
group_by(quantile_grp) %>%
summarise(
lower_quantile = round(quantile(avg_hte, probs = 0.025), 4),
upper_quantile = round(quantile(avg_hte, probs = 0.975), 4)
)
abc_cali_result_copy <- left_join(abc_avg_hte_copy,abc_boot_cali_ci_copy)## Joining with `by = join_by(quantile_grp)`
abc_cali_result_copy <- apply(abc_cali_result_copy, 2, function(x) round(x, 2)) %>% data.frame()
abc_cali_result_copy$mean_diff <- paste0( abc_cali_result_copy$abc_avg_hte,"(",abc_cali_result_copy$lower_quantile,",",abc_cali_result_copy$upper_quantile,")")### calibration plot data prep
cali10_plot_dat <- cbind(wide_10group,abc_cali_result_copy)
cali10_plot <- ggplot(cali10_plot_dat, aes(x = as.factor(quantile_grp), y = abc_avg_hte)) +
geom_point(size = 5) +
# geom_line(aes(group = 1), color = "blue") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile),width = 0.3,linewidth = 1, color = "black") +
labs(x = "",
y = "Mean heter_mean_diff"
# title = ""
) +
geom_hline(yintercept = abc_ate, linetype = "dashed", color = "grey")+
geom_text(aes(x = 10, y = round(abc_ate,3), label = paste(" ATE=", round(abc_ate,3))),
vjust = -1, hjust = 0.5, color = "black")+
theme_minimal()+
scale_y_continuous(
limits = c(-0.2, 0.1),
breaks = seq(-0.2, 0.05, by = 0.05)
)+
ylab("Mean difference")+
coord_flip()+
# ylab(expression(atop("Risk Difference, %", atop("Harm" %<-% phantom(" ") %->% "Benefit")))) + # Custom y-axis label
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 14))## for reporting purpose
wide_10group$`Mean difference` <- abc_cali_result_copy$mean_diff
colnames(wide_10group) <- c("Score group", "Score interval", "Placebo Control", "Project ABC", "Mean difference"
)
wide_10group_plot <- wide_10group
wide_10group_plot$`Score group` <- as.character(wide_10group_plot$`Score group`)
wide_10group_plot$`Score group`[wide_10group_plot$`Score group` == "1"] <- "1 (Lowest)"
wide_10group_plot$`Score group`[wide_10group_plot$`Score group` == "10"] <- "10 (Highest)"The calibration table and plot:
cali10_plot_dat$quantile_grp = factor(cali10_plot_dat$quantile_grp,levels =c("1","2","3","4","5","6","7","8","9","10") )
extended_levels <-c(rev(levels(cali10_plot_dat$quantile_grp)),11)
new_labels <- c(as.character(c(10:1)),"")
cali10_plot <- ggplot(cali10_plot_dat, aes(x = as.factor(quantile_grp), y = abc_avg_hte)) +
geom_point(size = 5,shape=15,color = "#1f78b4") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile), width = 0.3,linewidth = 1,color = "#1f78b4") +
labs(x = "", y = "Mean difference of 3-month CDI-SF mean score(Project ABC vs Placebo Control)") +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
geom_hline(yintercept = abc_ate, linetype = "dashed",linewidth = 0.7,color = "#1f78b4") +
geom_text(aes(x = 10, y = round(abc_ate,2), label = paste(" ATE=", round(abc_ate,2))),
vjust = -1, hjust = 0.5, color = "#1f78b4")+
theme_minimal() +
scale_y_continuous(limits = c(-0.2, max_cali_y), breaks = seq(-0.2, 0.1, by = 0.05)) +
coord_flip() +
#scale_x_discrete(limits = rev(levels(cali10_plot_dat$quantile_grp)))+
scale_x_discrete(limits = extended_levels,labels = new_labels) +
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 12))+
theme(axis.text.x = element_text(size = 12))
cali10_plot#### table plot
wide10plot <- wide_10group_plot %>% mutate(`Placebo Control` = as.character(`Placebo Control`),
`Project ABC` = as.character(`Project ABC`),
`Num.~Control/ABC^1`= paste0(`Placebo Control`,"/",`Project ABC`)
#`Score~interval^1` = `Score interval`
)
wide10plot <- rbind(colnames(wide10plot),wide10plot)
wide10plot1 <- wide10plot
wide10plot1$`Score group` <- factor(wide10plot$`Score group`,
levels = c("Score group", "1 (Lowest)", "2", "3", "4", "5", "6", "7",
"8", "9", "10 (Highest)"))
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.07(-0.12,0)"] = "-0.07(-0.12,0.00)"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.1(-0.16,-0.04)"] = "-0.10(-0.16,-0.04)"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.11(-0.2,-0.04)"] = "-0.11(-0.20,-0.04)"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.05(-0.12,0)"] = "-0.05(-0.12,0.00)"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="0(-0.14,0.06)"] = "0.00(-0.14,0.06)"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.16(-0.2,-0.04)"] = "-0.16(-0.20,-0.04)"
# wide10plot1$`Score group`[wide10plot1$`Score group`=="Score group"] = "Score~group^1"
# wide10plot1$`Score interval`[wide10plot1$`Score interval`=="Score interval"] = "Score~interval^2"
# wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="Mean difference"] = "Mean~difference^4"
wide10plot1$myscale <- c(1.2,2.1,3.1,4.1,5,6,7,8,9,10,11)
wide10plot1$myscale <- c(1.1, c(2:11)+0.05)
table_text <- ggplot(wide10plot1, aes(x = 1, y = myscale)) +
geom_text(aes(label = `Score group`), size = 5, hjust = 0,nudge_x = 0) +
geom_text(aes(label =`Score interval` ), size = 5, hjust = 0.5, vjust = 0.5, nudge_x = 1.2) +
#geom_text(aes(label = `Placebo Control`), size = 5, hjust = 0, nudge_x = 1.8) +
#geom_text(aes(label = `Project ABC`), size = 5, hjust = 0, nudge_x = 3) +
geom_text(aes(label = `Num.~Control/ABC^1`), size = 5, hjust = 0.5, vjust = 0.5, nudge_x = 2.4,parse = TRUE)+
geom_text(aes(label = `Mean difference`), size = 5, hjust = 0.5, vjust = 0.5, nudge_x = 3.75) +
theme_void() +
xlim(1, 5) +
ylim(12, 1)
risk_strata_plot <- plot_grid(table_text, cali10_plot, ncol = 2, rel_widths = c(2, 3))
table_text5.3.2 Rearrange for report (For Porject personality vs control)
## calibration plot
person_cal_dat_copy <- data.frame(person_ate,
person_hte,
lp_person)
person_cal_dat_copy$quantile_grp <- cut(person_cal_dat_copy$lp_person,
breaks = quantile(person_cal_dat_copy$lp_person, probs = seq(0, 1, by = 0.1)),
include.lowest = TRUE,
labels = FALSE)
person_cal_dat_copy$condition <- dat_person$condition
cali_10group <- table(person_cal_dat_copy$condition,person_cal_dat_copy$quantile_grp) %>% as.data.frame()
wide_10group <- tidyr::pivot_wider(cali_10group, names_from = Var1, values_from = Freq)
## risk quantiles
quantiles <- quantile(person_cal_dat_copy$lp_person, probs = seq(0, 1, by = 0.1)) %>% round(2)
base_score_int <- paste(head(quantiles, -1), tail(quantiles, -1), sep = " - ")
wide_10group$base_score <- base_score_int
wide_10group <- wide_10group %>%
dplyr::select(Var2, base_score,everything())
## compute the averaged hte effect in each quantile risk grp
person_avg_hte_copy <- person_cal_dat_copy %>%
group_by(quantile_grp) %>%
summarise(person_avg_hte = mean(person_hte, na.rm = TRUE))
###boot CI
person_boot_res_copy <- boot1_f(mydat = dat_person)
person_boot_cali_ci_copy <- person_boot_res_copy %>%
group_by(quantile_grp) %>%
summarise(
lower_quantile = round(quantile(avg_hte, probs = 0.025), 4),
upper_quantile = round(quantile(avg_hte, probs = 0.975), 4)
)
person_cali_result_copy <- left_join(person_avg_hte_copy,person_boot_cali_ci_copy)## Joining with `by = join_by(quantile_grp)`
person_cali_result_copy <- apply(person_cali_result_copy, 2, function(x) round(x, 2)) %>% data.frame()
person_cali_result_copy$mean_diff <- paste0( person_cali_result_copy$person_avg_hte,"(",person_cali_result_copy$lower_quantile,",",person_cali_result_copy$upper_quantile,")")### calibration plot data prep
cali10_plot_dat <- cbind(wide_10group,person_cali_result_copy)
cali10_plot <- ggplot(cali10_plot_dat, aes(x = as.factor(quantile_grp), y = person_avg_hte)) +
geom_point(size = 5) +
# geom_line(aes(group = 1), color = "blue") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile), width = 0.3,linewidth = 1, color = "black") +
labs(x = "",
y = "Mean heter_mean_diff"
# title = ""
) +
geom_hline(yintercept = person_ate, linetype = "dashed",linewidth = 0.7, color = "grey")+
theme_minimal()+
scale_y_continuous(
limits = c(-0.2, max_cali_y),
breaks = seq(-0.2, 0.05, by = 0.05)
)+
ylab("Mean difference")+
coord_flip()+
# ylab(expression(atop("Risk Difference, %", atop("Harm" %<-% phantom(" ") %->% "Benefit")))) + # Custom y-axis label
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 14))## for reporting purpose
wide_10group$`Mean difference` <- person_cali_result_copy$mean_diff
colnames(wide_10group) <- c("Score group", "Score interval", "Placebo Control", "Project Personality", "Mean difference"
)
wide_10group_plot <- wide_10group
wide_10group_plot$`Score group` <- as.character(wide_10group_plot$`Score group`)
wide_10group_plot$`Score group`[wide_10group_plot$`Score group` == "1"] <- "1 (Lowest)"
wide_10group_plot$`Score group`[wide_10group_plot$`Score group` == "10"] <- "10 (Highest)"The calibration table and plot:
cali10_plot_dat$quantile_grp = factor(cali10_plot_dat$quantile_grp,levels =c("1","2","3","4","5","6","7","8","9","10") )
extended_levels <-c(rev(levels(cali10_plot_dat$quantile_grp)),11)
new_labels <- c(as.character(c(10:1)),"")
cali10_plot <- ggplot(cali10_plot_dat, aes(x = as.factor(quantile_grp), y = person_avg_hte)) +
geom_point(size = 5,shape=15,color = "#8B0000") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile),width = 0.3,linewidth = 1,color = "#8B0000") +
labs(x = "", y = "Mean difference of 3-month CDI-SF mean score(Project Personality vs Placebo Control)") +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
geom_hline(yintercept = person_ate, linetype = "dashed",color = "#8B0000") +
geom_text(aes(x = 10, y = round(person_ate,3), label = paste(" ATE=", round(person_ate,3))),
vjust = -1, hjust = 0.5, color = "#8B0000")+
theme_minimal() +
scale_y_continuous(limits = c(-0.2, max_cali_y), breaks = seq(-0.2, max_cali_y, by = 0.05)) +
coord_flip() +
#scale_x_discrete(limits = rev(levels(cali10_plot_dat$quantile_grp)))+
scale_x_discrete(limits = extended_levels,labels = new_labels) + # 反转x轴顺序
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 12))+
theme(axis.text.x = element_text(size = 12))
cali10_plot#### table plot
wide10plot <- wide_10group_plot %>% mutate(`Placebo Control` = as.character(`Placebo Control`),
`Project Personality` = as.character(`Project Personality`),
`Num.~Control/Personality^2`= paste0(`Placebo Control`,"/",`Project Personality`)
#`Score~interval^1` = `Score interval`
)
wide10plot <- rbind(colnames(wide10plot),wide10plot)
wide10plot1 <- wide10plot
wide10plot1$`Score group` <- factor(wide10plot$`Score group`,
levels = c("Score group", "1 (Lowest)", "2", "3", "4", "5", "6", "7",
"8", "9", "10 (Highest)"))
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.07(-0.12,0)"] = "-0.07(-0.12,0.00)"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.1(-0.16,-0.04)"] = "-0.10(-0.16,-0.04)"
wide10plot1$`Score interval`[wide10plot1$`Score interval`=="0.98 - 1"] = "0.98 - 1.00"
wide10plot1$`Score interval`[wide10plot1$`Score interval`=="0.87 - 0.9"] = "0.87 - 0.90"
wide10plot1$`Score interval`[wide10plot1$`Score interval`=="1 - 1.03"] = "1.00 - 1.03"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.06(-0.14,0)"] = "-0.06(-0.14,0.00)"
wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="-0.1(-0.15,0.01)"] = "-0.10(-0.15,0.01)"
# wide10plot1$`Score group`[wide10plot1$`Score group`=="Score group"] = "Score~group^1"
# wide10plot1$`Score interval`[wide10plot1$`Score interval`=="Score interval"] = "Score~interval^2"
# wide10plot1$`Mean difference`[wide10plot1$`Mean difference`=="Mean difference"] = "Mean~difference^4"
wide10plot1$myscale <- c(1.2,2.1,3.1,4.1,5,6,7,8,9,10,11)
wide10plot1$myscale <- c(1.1, c(2:11)+0.05)
table_text <- ggplot(wide10plot1, aes(x = 1, y = myscale)) +
geom_text(aes(label = `Score group`), size = 5, hjust = 0,nudge_x = 0) +
geom_text(aes(label =`Score interval` ), size = 5, hjust = 0.5, vjust = 0.5, nudge_x = 1.2) +
#geom_text(aes(label = `Placebo Control`), size = 5, hjust = 0, nudge_x = 1.8) +
#geom_text(aes(label = `Project Personality`), size = 5, hjust = 0, nudge_x = 3) +
geom_text(aes(label = `Num.~Control/Personality^2`), size = 5, hjust = 0.5, vjust = 0.5, nudge_x = 2.4,parse = TRUE)+
geom_text(aes(label = `Mean difference`), size = 5, hjust = 0.5, vjust = 0.5, nudge_x = 3.75) +
theme_void() +
xlim(1, 5) +
ylim(12, 1)
risk_strata_plot_person <-plot_grid(table_text, cali10_plot, ncol = 2, rel_widths = c(2, 3))
# ggsave("figures/person_risk_strata.png", risk_strata_plot_person, width = 12, height = 6, dpi = 600)
risk_plots_combined <- ggarrange(risk_strata_plot, risk_strata_plot_person, ncol = 1, nrow = 2)
table_text# # risk_plots_combined
# ggsave("figures/risk_plots_combined.png", width = 12, height = 6, dpi = 600)
combined_plot <- arrangeGrob(risk_strata_plot, risk_strata_plot_person, ncol = 1, nrow = 2)
# Save the combined plot to a file with specified dimensions
ggsave("figures/combined_risk_plot.png", combined_plot, width = 18.2, height = 12)5.4 Distribution in high risk groups
risk_abc_dat <- cbind(table1_dat %>% filter(condition %in% c("Project ABC", "Placebo Control")),abc_cal_dat_copy %>%
dplyr::select(-condition))
risk_table1_abc <-tbl_summary(risk_abc_dat %>% dplyr::select(-id,-`3-month CDI mean score`),
by = quantile_grp, statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"), digits = all_continuous() ~ 2,
# label = list(
# b_dem_sex ~ "Biological Sex",
# b_dem_orientation ~ "Sexual Orientation"
# #,
# # b_cdi_sum ~ "Baseline CDI Sum Score (0-24)"
# )
) %>%
modify_header(label ~ "**Demographics**")
risk_table1_abc_df <- as.data.frame(risk_table1_abc)
latex_table1_abc <- xtable(risk_table1_abc_df)
writeLines(print(latex_table1_abc, type = "latex",include.rownames = FALSE), paste0("table1_df_abc_",Sys.Date(),".tex"))## % latex table generated in R 4.3.1 by xtable 1.8-4 package
## % Fri Aug 2 16:25:17 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{lllllllllll}
## \hline
## **Demographics** & **1**
## N = 98 & **10**
## N = 94 & **2**
## N = 104 & **3**
## N = 92 & **4**
## N = 97 & **5**
## N = 101 & **6**
## N = 95 & **7**
## N = 103 & **8**
## N = 91 & **9**
## N = 102 \\
## \hline
## condition & & & & & & & & & & \\
## Placebo Control & 44 (45\%) & 42 (45\%) & 57 (55\%) & 52 (57\%) & 51 (53\%) & 44 (44\%) & 41 (43\%) & 51 (50\%) & 45 (49\%) & 61 (60\%) \\
## Project ABC & 54 (55\%) & 52 (55\%) & 47 (45\%) & 40 (43\%) & 46 (47\%) & 57 (56\%) & 54 (57\%) & 52 (50\%) & 46 (51\%) & 41 (40\%) \\
## Baseline CDI mean score(0-2) & 0.92 (0.31) & 1.37 (0.30) & 1.04 (0.37) & 1.06 (0.33) & 1.05 (0.29) & 1.14 (0.34) & 1.23 (0.31) & 1.21 (0.30) & 1.26 (0.32) & 1.29 (0.30) \\
## Race & & & & & & & & & & \\
## Asian Including Asian Desi & 18 (18\%) & 1 (1.1\%) & 13 (13\%) & 35 (38\%) & 3 (3.1\%) & 9 (8.9\%) & 10 (11\%) & 9 (8.7\%) & 5 (5.5\%) & 5 (4.9\%) \\
## Black/African-American & 16 (16\%) & 4 (4.3\%) & 16 (15\%) & 5 (5.4\%) & 5 (5.2\%) & 11 (11\%) & 8 (8.4\%) & 3 (2.9\%) & 3 (3.3\%) & 2 (2.0\%) \\
## Hispanic/Latinx & 38 (39\%) & 1 (1.1\%) & 27 (26\%) & 1 (1.1\%) & 16 (16\%) & 20 (20\%) & 2 (2.1\%) & 3 (2.9\%) & 5 (5.5\%) & 5 (4.9\%) \\
## Mixed & 10 (10\%) & 14 (15\%) & 26 (25\%) & 1 (1.1\%) & 13 (13\%) & 17 (17\%) & 21 (22\%) & 11 (11\%) & 12 (13\%) & 17 (17\%) \\
## White & 16 (16\%) & 74 (79\%) & 22 (21\%) & 50 (54\%) & 60 (62\%) & 44 (44\%) & 54 (57\%) & 77 (75\%) & 66 (73\%) & 73 (72\%) \\
## Age (yrs) & & & & & & & & & & \\
## 13 & 3 (3.1\%) & 10 (11\%) & 4 (3.8\%) & 1 (1.1\%) & 5 (5.2\%) & 6 (5.9\%) & 7 (7.4\%) & 8 (7.8\%) & 9 (9.9\%) & 7 (6.9\%) \\
## 14 & 12 (12\%) & 16 (17\%) & 13 (13\%) & 19 (21\%) & 11 (11\%) & 21 (21\%) & 15 (16\%) & 12 (12\%) & 20 (22\%) & 19 (19\%) \\
## 15 & 32 (33\%) & 30 (32\%) & 30 (29\%) & 29 (32\%) & 27 (28\%) & 28 (28\%) & 27 (28\%) & 39 (38\%) & 27 (30\%) & 37 (36\%) \\
## 16 & 51 (52\%) & 38 (40\%) & 57 (55\%) & 43 (47\%) & 54 (56\%) & 46 (46\%) & 46 (48\%) & 44 (43\%) & 35 (38\%) & 39 (38\%) \\
## Biological sex & & & & & & & & & & \\
## Female & 66 (67\%) & 94 (100\%) & 83 (80\%) & 80 (87\%) & 87 (90\%) & 92 (91\%) & 80 (84\%) & 101 (98\%) & 87 (96\%) & 101 (99\%) \\
## Male & 32 (33\%) & 0 (0\%) & 21 (20\%) & 12 (13\%) & 10 (10\%) & 9 (8.9\%) & 15 (16\%) & 2 (1.9\%) & 4 (4.4\%) & 1 (1.0\%) \\
## Sexual orientation & & & & & & & & & & \\
## Heterosexual & 77 (79\%) & 0 (0\%) & 45 (43\%) & 39 (42\%) & 5 (5.2\%) & 20 (20\%) & 16 (17\%) & 0 (0\%) & 3 (3.3\%) & 0 (0\%) \\
## LGBTQ & 20 (20\%) & 64 (68\%) & 57 (55\%) & 48 (52\%) & 83 (86\%) & 68 (67\%) & 75 (79\%) & 90 (87\%) & 54 (59\%) & 77 (75\%) \\
## Other & 1 (1.0\%) & 30 (32\%) & 2 (1.9\%) & 5 (5.4\%) & 9 (9.3\%) & 13 (13\%) & 4 (4.2\%) & 13 (13\%) & 34 (37\%) & 25 (25\%) \\
## Language & & & & & & & & & & \\
## English & 91 (93\%) & 93 (99\%) & 100 (96\%) & 88 (96\%) & 94 (97\%) & 99 (98\%) & 94 (99\%) & 102 (99\%) & 91 (100\%) & 100 (98\%) \\
## Other & 7 (7.1\%) & 1 (1.1\%) & 4 (3.8\%) & 4 (4.3\%) & 3 (3.1\%) & 2 (2.0\%) & 1 (1.1\%) & 1 (1.0\%) & 0 (0\%) & 2 (2.0\%) \\
## Gender identity & & & & & & & & & & \\
## Non-binary & 1 (1.0\%) & 79 (84\%) & 2 (1.9\%) & 1 (1.1\%) & 5 (5.2\%) & 5 (5.0\%) & 20 (21\%) & 10 (9.7\%) & 27 (30\%) & 50 (49\%) \\
## Women/girls & 70 (71\%) & 0 (0\%) & 83 (80\%) & 80 (87\%) & 86 (89\%) & 88 (87\%) & 61 (64\%) & 87 (84\%) & 48 (53\%) & 35 (34\%) \\
## Male/Masculine & 27 (28\%) & 15 (16\%) & 19 (18\%) & 11 (12\%) & 6 (6.2\%) & 8 (7.9\%) & 14 (15\%) & 6 (5.8\%) & 16 (18\%) & 17 (17\%) \\
## Number of challenges & & & & & & & & & & \\
## 0 & 26 (27\%) & 8 (8.5\%) & 31 (30\%) & 37 (40\%) & 9 (9.3\%) & 9 (8.9\%) & 30 (32\%) & 16 (16\%) & 17 (19\%) & 14 (14\%) \\
## 1 & 58 (59\%) & 44 (47\%) & 65 (63\%) & 46 (50\%) & 77 (79\%) & 58 (57\%) & 44 (46\%) & 74 (72\%) & 47 (52\%) & 60 (59\%) \\
## $>$=2 & 14 (14\%) & 42 (45\%) & 8 (7.7\%) & 9 (9.8\%) & 11 (11\%) & 34 (34\%) & 21 (22\%) & 13 (13\%) & 27 (30\%) & 28 (27\%) \\
## Type of challenges & & & & & & & & & & \\
## No impact & 26 (27\%) & 8 (8.5\%) & 31 (30\%) & 37 (40\%) & 9 (9.3\%) & 9 (8.9\%) & 30 (32\%) & 16 (16\%) & 17 (19\%) & 14 (14\%) \\
## Other & 11 (11\%) & 45 (48\%) & 10 (9.6\%) & 12 (13\%) & 9 (9.3\%) & 33 (33\%) & 27 (28\%) & 6 (5.8\%) & 25 (27\%) & 38 (37\%) \\
## School & 61 (62\%) & 41 (44\%) & 63 (61\%) & 43 (47\%) & 79 (81\%) & 59 (58\%) & 38 (40\%) & 81 (79\%) & 49 (54\%) & 50 (49\%) \\
## Type of coping strategies & & & & & & & & & & \\
## Combined & 0 (0\%) & 35 (37\%) & 0 (0\%) & 0 (0\%) & 2 (2.1\%) & 7 (6.9\%) & 9 (9.5\%) & 13 (13\%) & 17 (19\%) & 36 (35\%) \\
## No action & 11 (11\%) & 59 (63\%) & 39 (38\%) & 33 (36\%) & 25 (26\%) & 62 (61\%) & 67 (71\%) & 78 (76\%) & 47 (52\%) & 42 (41\%) \\
## Positive & 87 (89\%) & 0 (0\%) & 65 (63\%) & 59 (64\%) & 70 (72\%) & 32 (32\%) & 19 (20\%) & 12 (12\%) & 27 (30\%) & 24 (24\%) \\
## abc\_ate & & & & & & & & & & \\
## -0.0795346966023499 & 98 (100\%) & 94 (100\%) & 104 (100\%) & 92 (100\%) & 97 (100\%) & 101 (100\%) & 95 (100\%) & 103 (100\%) & 91 (100\%) & 102 (100\%) \\
## abc\_hte & -0.16 (0.01) & 0.00 (0.02) & -0.13 (0.01) & -0.11 (0.00) & -0.10 (0.00) & -0.09 (0.00) & -0.07 (0.00) & -0.06 (0.00) & -0.05 (0.00) & -0.03 (0.01) \\
## lp\_abc & 0.78 (0.03) & 1.19 (0.04) & 0.86 (0.02) & 0.90 (0.01) & 0.92 (0.01) & 0.96 (0.01) & 1.00 (0.01) & 1.03 (0.00) & 1.06 (0.01) & 1.11 (0.02) \\
## \hline
## \end{tabular}
## \end{table}
risk_person_dat <- cbind(table1_dat %>% filter(condition %in% c("Project Personality", "Placebo Control")),person_cal_dat_copy %>% dplyr::select(-condition))
risk_table1_person <- tbl_summary(risk_person_dat %>% dplyr::select(-id,-`3-month CDI mean score`),
by = quantile_grp, statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"), digits = all_continuous() ~ 2,
# label = list(
# b_dem_sex ~ "Biological Sex",
# b_dem_orientation ~ "Sexual Orientation"
# #,
# # b_cdi_sum ~ "Baseline CDI Sum Score (0-24)"
# )
) %>%
modify_header(label ~ "**Demographics**")
risk_table1_person_df <- as.data.frame(risk_table1_person)
latex_table1_person <- xtable(risk_table1_person_df)
writeLines(print(latex_table1_person, type = "latex",include.rownames = FALSE), paste0("table1_df_person_",Sys.Date(),".tex"))## % latex table generated in R 4.3.1 by xtable 1.8-4 package
## % Fri Aug 2 16:25:19 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{lllllllllll}
## \hline
## **Demographics** & **1**
## N = 101 & **10**
## N = 96 & **2**
## N = 94 & **3**
## N = 103 & **4**
## N = 83 & **5**
## N = 95 & **6**
## N = 120 & **7**
## N = 72 & **8**
## N = 94 & **9**
## N = 94 \\
## \hline
## condition & & & & & & & & & & \\
## Placebo Control & 56 (55\%) & 46 (48\%) & 53 (56\%) & 46 (45\%) & 44 (53\%) & 47 (49\%) & 63 (53\%) & 33 (46\%) & 46 (49\%) & 54 (57\%) \\
## Project Personality & 45 (45\%) & 50 (52\%) & 41 (44\%) & 57 (55\%) & 39 (47\%) & 48 (51\%) & 57 (48\%) & 39 (54\%) & 48 (51\%) & 40 (43\%) \\
## Baseline CDI mean score(0-2) & 0.93 (0.32) & 1.35 (0.30) & 1.02 (0.39) & 1.06 (0.33) & 1.12 (0.37) & 1.19 (0.31) & 1.19 (0.28) & 1.25 (0.30) & 1.24 (0.36) & 1.29 (0.33) \\
## Race & & & & & & & & & & \\
## Asian Including Asian Desi & 11 (11\%) & 6 (6.3\%) & 9 (9.6\%) & 3 (2.9\%) & 26 (31\%) & 15 (16\%) & 5 (4.2\%) & 15 (21\%) & 2 (2.1\%) & 8 (8.5\%) \\
## Black/African-American & 14 (14\%) & 3 (3.1\%) & 12 (13\%) & 12 (12\%) & 8 (9.6\%) & 5 (5.3\%) & 4 (3.3\%) & 3 (4.2\%) & 2 (2.1\%) & 6 (6.4\%) \\
## Hispanic/Latinx & 20 (20\%) & 3 (3.1\%) & 26 (28\%) & 8 (7.8\%) & 0 (0\%) & 24 (25\%) & 13 (11\%) & 5 (6.9\%) & 3 (3.2\%) & 8 (8.5\%) \\
## Mixed & 9 (8.9\%) & 14 (15\%) & 23 (24\%) & 10 (9.7\%) & 9 (11\%) & 12 (13\%) & 9 (7.5\%) & 19 (26\%) & 17 (18\%) & 15 (16\%) \\
## White & 47 (47\%) & 70 (73\%) & 24 (26\%) & 70 (68\%) & 40 (48\%) & 39 (41\%) & 89 (74\%) & 30 (42\%) & 70 (74\%) & 57 (61\%) \\
## Age (yrs) & & & & & & & & & & \\
## 13 & 3 (3.0\%) & 7 (7.3\%) & 4 (4.3\%) & 1 (1.0\%) & 3 (3.6\%) & 10 (11\%) & 9 (7.5\%) & 7 (9.7\%) & 7 (7.4\%) & 5 (5.3\%) \\
## 14 & 15 (15\%) & 17 (18\%) & 11 (12\%) & 18 (17\%) & 15 (18\%) & 9 (9.5\%) & 15 (13\%) & 13 (18\%) & 13 (14\%) & 14 (15\%) \\
## 15 & 39 (39\%) & 28 (29\%) & 28 (30\%) & 34 (33\%) & 25 (30\%) & 31 (33\%) & 37 (31\%) & 23 (32\%) & 32 (34\%) & 35 (37\%) \\
## 16 & 44 (44\%) & 44 (46\%) & 51 (54\%) & 50 (49\%) & 40 (48\%) & 45 (47\%) & 59 (49\%) & 29 (40\%) & 42 (45\%) & 40 (43\%) \\
## Biological sex & & & & & & & & & & \\
## Female & 58 (57\%) & 96 (100\%) & 77 (82\%) & 94 (91\%) & 78 (94\%) & 87 (92\%) & 113 (94\%) & 71 (99\%) & 87 (93\%) & 91 (97\%) \\
## Male & 43 (43\%) & 0 (0\%) & 17 (18\%) & 9 (8.7\%) & 5 (6.0\%) & 8 (8.4\%) & 7 (5.8\%) & 1 (1.4\%) & 7 (7.4\%) & 3 (3.2\%) \\
## Sexual orientation & & & & & & & & & & \\
## Heterosexual & 86 (85\%) & 0 (0\%) & 44 (47\%) & 29 (28\%) & 27 (33\%) & 18 (19\%) & 7 (5.8\%) & 3 (4.2\%) & 0 (0\%) & 0 (0\%) \\
## LGBTQ & 13 (13\%) & 66 (69\%) & 45 (48\%) & 66 (64\%) & 51 (61\%) & 63 (66\%) & 87 (73\%) & 58 (81\%) & 76 (81\%) & 75 (80\%) \\
## Other & 2 (2.0\%) & 30 (31\%) & 5 (5.3\%) & 8 (7.8\%) & 5 (6.0\%) & 14 (15\%) & 26 (22\%) & 11 (15\%) & 18 (19\%) & 19 (20\%) \\
## Language & & & & & & & & & & \\
## English & 94 (93\%) & 94 (98\%) & 90 (96\%) & 100 (97\%) & 80 (96\%) & 92 (97\%) & 119 (99\%) & 70 (97\%) & 94 (100\%) & 93 (99\%) \\
## Other & 7 (6.9\%) & 2 (2.1\%) & 4 (4.3\%) & 3 (2.9\%) & 3 (3.6\%) & 3 (3.2\%) & 1 (0.8\%) & 2 (2.8\%) & 0 (0\%) & 1 (1.1\%) \\
## Gender identity & & & & & & & & & & \\
## Non-binary & 0 (0\%) & 91 (95\%) & 0 (0\%) & 2 (1.9\%) & 0 (0\%) & 2 (2.1\%) & 3 (2.5\%) & 3 (4.2\%) & 20 (21\%) & 67 (71\%) \\
## Women/girls & 61 (60\%) & 0 (0\%) & 77 (82\%) & 93 (90\%) & 78 (94\%) & 82 (86\%) & 95 (79\%) & 61 (85\%) & 55 (59\%) & 17 (18\%) \\
## Male/Masculine & 40 (40\%) & 5 (5.2\%) & 17 (18\%) & 8 (7.8\%) & 5 (6.0\%) & 11 (12\%) & 22 (18\%) & 8 (11\%) & 19 (20\%) & 10 (11\%) \\
## Number of challenges & & & & & & & & & & \\
## 0 & 10 (9.9\%) & 20 (21\%) & 27 (29\%) & 4 (3.9\%) & 37 (45\%) & 20 (21\%) & 15 (13\%) & 25 (35\%) & 11 (12\%) & 14 (15\%) \\
## 1 & 78 (77\%) & 39 (41\%) & 56 (60\%) & 77 (75\%) & 37 (45\%) & 48 (51\%) & 85 (71\%) & 30 (42\%) & 58 (62\%) & 43 (46\%) \\
## $>$=2 & 13 (13\%) & 37 (39\%) & 11 (12\%) & 22 (21\%) & 9 (11\%) & 27 (28\%) & 20 (17\%) & 17 (24\%) & 25 (27\%) & 37 (39\%) \\
## Type of challenges & & & & & & & & & & \\
## No impact & 10 (9.9\%) & 20 (21\%) & 27 (29\%) & 4 (3.9\%) & 37 (45\%) & 20 (21\%) & 15 (13\%) & 25 (35\%) & 11 (12\%) & 14 (15\%) \\
## Other & 11 (11\%) & 37 (39\%) & 10 (11\%) & 21 (20\%) & 9 (11\%) & 37 (39\%) & 19 (16\%) & 19 (26\%) & 39 (41\%) & 41 (44\%) \\
## School & 80 (79\%) & 39 (41\%) & 57 (61\%) & 78 (76\%) & 37 (45\%) & 38 (40\%) & 86 (72\%) & 28 (39\%) & 44 (47\%) & 39 (41\%) \\
## Type of coping strategies & & & & & & & & & & \\
## Combined & 0 (0\%) & 31 (32\%) & 0 (0\%) & 0 (0\%) & 2 (2.4\%) & 1 (1.1\%) & 8 (6.7\%) & 15 (21\%) & 29 (31\%) & 25 (27\%) \\
## No action & 16 (16\%) & 65 (68\%) & 28 (30\%) & 33 (32\%) & 36 (43\%) & 63 (66\%) & 95 (79\%) & 49 (68\%) & 50 (53\%) & 29 (31\%) \\
## Positive & 85 (84\%) & 0 (0\%) & 66 (70\%) & 70 (68\%) & 45 (54\%) & 31 (33\%) & 17 (14\%) & 8 (11\%) & 15 (16\%) & 40 (43\%) \\
## person\_ate & & & & & & & & & & \\
## -0.0689513014845681 & 101 (100\%) & 96 (100\%) & 94 (100\%) & 103 (100\%) & 83 (100\%) & 95 (100\%) & 120 (100\%) & 72 (100\%) & 94 (100\%) & 94 (100\%) \\
## person\_hte & -0.10 (0.01) & -0.03 (0.01) & -0.09 (0.00) & -0.08 (0.00) & -0.08 (0.00) & -0.07 (0.00) & -0.07 (0.00) & -0.06 (0.00) & -0.06 (0.00) & -0.04 (0.00) \\
## lp\_person & 0.84 (0.03) & 1.17 (0.03) & 0.89 (0.01) & 0.93 (0.01) & 0.94 (0.01) & 0.96 (0.01) & 1.00 (0.01) & 1.01 (0.01) & 1.05 (0.01) & 1.10 (0.02) \\
## \hline
## \end{tabular}
## \end{table}
6 Sensitivity Analysis
6.1 inverse probability of missingness weighting (IPMW)
Check the missingness, and regress the missing status on variables with complete info.
model_dat <- cope_subset3 %>%
dplyr::select(-family_num,-b_response_id,-recode_language,-b_screener_age)
## check missingness
model_dat$missing[complete.cases(model_dat)] <- 0
model_dat$missing[!complete.cases(model_dat)] <- 1
model_dat$missing %>% table()## .
## 0 1
## 1441 60
## tibble [1,501 × 10] (S3: tbl_df/tbl/data.frame)
## $ condition : chr [1:1501] "Placebo Control" "Project Personality" "Placebo Control" "Placebo Control" ...
## $ b_cdi_mean : num [1:1501] 1.417 0.917 1.167 1.333 0.917 ...
## $ f1_cdi_mean : num [1:1501] 1 0.583 0.583 1.417 0.5 ...
## $ recode_race : Factor w/ 5 levels "Asian Including Asian Desi",..: 5 5 5 5 5 5 1 1 5 4 ...
## $ b_dem_sex : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
## $ recode_orient: Factor w/ 3 levels "Heterosexual",..: 2 3 2 2 2 2 1 2 1 2 ...
## $ genderid3 : Factor w/ 3 levels "1","2","3": 1 1 2 2 2 3 2 2 2 2 ...
## $ family_cat : Factor w/ 3 levels "No impact","Other",..: 2 2 1 3 3 3 3 3 3 2 ...
## $ cope3 : Factor w/ 3 levels "1","2","3": 3 3 3 2 1 2 2 2 2 2 ...
## $ missing : num [1:1501] 0 0 0 0 0 0 0 0 0 0 ...
missed <- glm(missing ~ genderid3+b_cdi_mean+family_cat+cope3,
data = model_dat, family = "binomial")
model_dat$fitted = missed$fitted.values
model_dat$IPCW = (1-mean(model_dat$missing))/(1-model_dat$fitted)
model_dat <- subset(model_dat, complete.cases(model_dat))A summary of the IPCW model:
##
## Call:
## glm(formula = missing ~ genderid3 + b_cdi_mean + family_cat +
## cope3, family = "binomial", data = model_dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.0919 0.8430 -3.668 0.000245 ***
## genderid32 -1.2368 0.2870 -4.309 1.64e-05 ***
## genderid33 -1.2284 0.4630 -2.653 0.007978 **
## b_cdi_mean 0.2561 0.4193 0.611 0.541408
## family_catOther -0.1730 0.3659 -0.473 0.636328
## family_catSchool -0.5263 0.3270 -1.610 0.107490
## cope32 0.9205 0.5452 1.688 0.091346 .
## cope33 0.7123 0.5729 1.243 0.213729
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 503.91 on 1500 degrees of freedom
## Residual deviance: 476.46 on 1493 degrees of freedom
## AIC: 492.46
##
## Number of Fisher Scoring iterations: 6
Summary of IPCW weights:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9687 0.9815 0.9878 1.0000 1.0002 1.1427
6.2 Baseline prediction models
ipcw_abc_dat <- model_dat %>% filter(condition!="Project Personality") %>%## ABC vs control
mutate(condition = as.character(condition),
condition = as.factor(condition)
)%>% dplyr::select(-condition,-b_cdi_mean,-fitted,-IPCW)
ipcw_person_dat <- model_dat %>%
filter(condition!="Project ABC") %>% ## Personality vs control
mutate(condition = as.character(condition),
condition = as.factor(condition)
)%>% dplyr::select(-condition,-b_cdi_mean,-fitted,-IPCW)
abc_weight <- ipcw_abc_dat$IPCW
person_weight <- ipcw_person_dat$IPCW
abc_ipcw_base_m <- glm(f1_cdi_mean ~.,
family =gaussian(link="identity"),
weights =abc_weight,
data = ipcw_abc_dat)
person_ipcw_base_m <- glm(f1_cdi_mean ~.,
family =gaussian(link="identity"),
weights =person_weight,
data = ipcw_person_dat)
## baseline linear pred values
abc_ipcw_lp <- predict(abc_ipcw_base_m, newdata = ipcw_abc_dat)
person_ipcw_lp <- predict(person_ipcw_base_m, newdata = ipcw_person_dat)
lp_abc <- abc_ipcw_lp
lp_person <- person_ipcw_lpbaseline model for Project ABC:
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.06 | 0.92 – 1.21 | <0.001 |
|
recode race [Black/African-American] |
-0.04 | -0.16 – 0.08 | 0.527 |
|
recode race [Hispanic/Latinx] |
-0.08 | -0.19 – 0.02 | 0.128 |
| recode race [Mixed] | -0.03 | -0.13 – 0.08 | 0.617 |
| recode race [White] | 0.03 | -0.05 – 0.12 | 0.462 |
| b dem sex [Male] | -0.15 | -0.25 – -0.04 | 0.006 |
| recode orient [LGBTQ] | 0.09 | 0.02 – 0.16 | 0.010 |
| recode orient [Other] | 0.12 | 0.03 – 0.22 | 0.008 |
| genderid3 [2] | -0.13 | -0.20 – -0.06 | <0.001 |
| genderid3 [3] | -0.02 | -0.12 – 0.08 | 0.687 |
| family cat [Other] | 0.07 | -0.01 – 0.15 | 0.070 |
| family cat [School] | 0.03 | -0.03 – 0.10 | 0.313 |
| cope3 [2] | -0.06 | -0.14 – 0.02 | 0.137 |
| cope3 [3] | -0.16 | -0.24 – -0.08 | <0.001 |
| Observations | 977 | ||
| R2 | 0.079 | ||
baseline model for Project personality:
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.16 | 1.00 – 1.31 | <0.001 |
|
recode race [Black/African-American] |
-0.06 | -0.18 – 0.07 | 0.387 |
|
recode race [Hispanic/Latinx] |
-0.05 | -0.16 – 0.06 | 0.382 |
| recode race [Mixed] | -0.03 | -0.14 – 0.08 | 0.552 |
| recode race [White] | -0.00 | -0.09 – 0.09 | 0.937 |
| b dem sex [Male] | -0.11 | -0.22 – 0.01 | 0.065 |
| recode orient [LGBTQ] | 0.07 | 0.00 – 0.14 | 0.046 |
| recode orient [Other] | 0.06 | -0.03 – 0.16 | 0.182 |
| genderid3 [2] | -0.15 | -0.22 – -0.08 | <0.001 |
| genderid3 [3] | -0.10 | -0.21 – -0.00 | 0.049 |
| family cat [Other] | 0.02 | -0.06 – 0.10 | 0.625 |
| family cat [School] | -0.02 | -0.09 – 0.05 | 0.664 |
| cope3 [2] | -0.06 | -0.15 – 0.03 | 0.193 |
| cope3 [3] | -0.13 | -0.22 – -0.04 | 0.005 |
| Observations | 952 | ||
| R2 | 0.052 | ||
6.3 Main effect model
ipcw_abc_dat <- model_dat %>% filter(condition!="Project Personality") %>%## ABC vs control
mutate(condition = as.character(condition),
condition = as.factor(condition)
)%>% dplyr::select(-fitted,-IPCW)
ipcw_person_dat <- model_dat %>%
filter(condition!="Project ABC") %>% ## Personality vs control
mutate(condition = as.character(condition),
condition = as.factor(condition)
)%>% dplyr::select(-fitted,-IPCW)
abc_ipcw_main_m <- glm(f1_cdi_mean ~ b_cdi_mean + condition,
family =gaussian(link="identity"),
weights =abc_weight,
data = ipcw_abc_dat)
person_ipcw_main_m <- glm(f1_cdi_mean ~b_cdi_mean + condition ,
family =gaussian(link="identity"),
weights =person_weight,
data = ipcw_person_dat)Main effect model for Project ABC:
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.27 | 0.19 – 0.36 | <0.001 |
| b cdi mean | 0.64 | 0.58 – 0.71 | <0.001 |
| condition [Project ABC] | -0.08 | -0.12 – -0.04 | <0.001 |
| Observations | 977 | ||
| R2 | 0.292 | ||
Main effect model for Project Personality:
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.31 | 0.23 – 0.39 | <0.001 |
| b cdi mean | 0.61 | 0.55 – 0.68 | <0.001 |
|
condition [Project Personality] |
-0.07 | -0.11 – -0.02 | 0.003 |
| Observations | 952 | ||
| R2 | 0.266 | ||
6.4 The HTE models
abc_ipcw_hte_m <- glm(f1_cdi_mean ~b_cdi_mean + condition * abc_ipcw_lp,
family =gaussian(link="identity"),
weights =abc_weight,
data = ipcw_abc_dat)
person_ipcw_hte_m <- glm(f1_cdi_mean ~b_cdi_mean + condition * person_ipcw_lp,
family =gaussian(link="identity"),
weights =person_weight,
data = ipcw_person_dat)HTE model for Project ABC:
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.21 | -0.05 – 0.47 | 0.120 |
| b cdi mean | 0.60 | 0.53 – 0.67 | <0.001 |
| condition [Project ABC] | -0.46 | -0.83 – -0.10 | 0.013 |
| abc ipcw lp | 0.12 | -0.15 – 0.39 | 0.393 |
|
condition [Project ABC] × abc ipcw lp |
0.39 | 0.02 – 0.77 | 0.039 |
| Observations | 977 | ||
| R2 | 0.302 | ||
HTE model for Project Personality:
| f 1 cdi mean | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.19 | -0.14 – 0.52 | 0.258 |
| b cdi mean | 0.59 | 0.52 – 0.66 | <0.001 |
|
condition [Project Personality] |
-0.28 | -0.76 – 0.19 | 0.242 |
| person ipcw lp | 0.15 | -0.19 – 0.49 | 0.383 |
|
condition [Project Personality] × person ipcw lp |
0.22 | -0.26 – 0.70 | 0.373 |
| Observations | 952 | ||
| R2 | 0.270 | ||
## rename variabels
abc_main_m <- abc_ipcw_main_m
person_main_m <- person_ipcw_main_m
abc_hte_m <- abc_ipcw_hte_m
person_hte_m <- person_ipcw_hte_m
## cATE/HTE
## project ABC vs control
hte_dat <- ipcw_abc_dat
hte_dat$condition = 'Placebo Control'
abc_hte_ctrl <- predict(abc_hte_m,hte_dat)
hte_dat$condition = 'Project ABC'
abc_hte_trt <- predict(abc_hte_m,hte_dat)
abc_hte <- abc_hte_trt-abc_hte_ctrl
## hte personality vs control
hte_dat <- ipcw_person_dat
hte_dat$condition = 'Placebo Control'
person_hte_ctrl <- predict(person_hte_m,hte_dat)
hte_dat$condition = 'Project Personality'
person_hte_trt <- predict(person_hte_m,hte_dat)
person_hte <- person_hte_trt-person_hte_ctrl
point_df <- data.frame(Comparsion= c("Project ABC vs Control","Project personality vs Control" ),
ATE = c(abc_ate,person_ate),
SE = c(summary(abc_main_m)$coefficients[, "Std. Error"][3],
summary(person_main_m)$coefficients[, "Std. Error"][3])
)
kable(point_df, caption = "summary of avearged treatment effect")| Comparsion | ATE | SE | |
|---|---|---|---|
| conditionProject ABC | Project ABC vs Control | -0.0795347 | 0.0223413 |
| conditionProject Personality | Project personality vs Control | -0.0689513 | 0.0233696 |
6.5 Risk stratification
## calibration plot
abc_ate <- coef(abc_main_m)[3]
person_ate <- coef(person_main_m)[3]
person_cal_dat <- data.frame(person_ate,
person_hte,
lp_person)
person_cal_dat$quantile_grp <- cut(person_cal_dat$lp_person,
breaks = quantile(person_cal_dat$lp_person, probs = seq(0, 1, by = 0.2)),
include.lowest = TRUE,
labels = FALSE)
## compute the averaged hte effect in each quantile risk grp
person_avg_hte <- person_cal_dat %>%
group_by(quantile_grp) %>%
summarise(person_avg_hte = mean(person_hte, na.rm = TRUE))6.6 Boot CI
## for ipcw data
## ipcw_person_dat
boot2_f <- function(mydat=NULL,seed=1017){
set.seed(seed)
abc_boot_res <- NULL
for (i in 1:1000){
boot.idx <- sample(1:dim(mydat)[1], size = dim(mydat)[1], replace = T)
boot.data <- mydat[boot.idx,]
abc_main_dat <- boot.data %>% dplyr::select(f1_cdi_mean,condition,b_cdi_mean)
abc_main_m <- lm(f1_cdi_mean~ b_cdi_mean + condition,data=abc_main_dat)
lp_abc <- predict(abc_main_m,boot.data)
abc_ate <- coef(abc_main_m)[3]
abc_hte_m <- glm(f1_cdi_mean ~b_cdi_mean + condition*lp_abc,
family =gaussian(link="identity"),
weights =boot.data$IPCW,
data = boot.data)
## cATE/HTE
## project ABC vs control
hte_dat <- boot.data
trt_levels <- unique(boot.data$condition) %>% as.character()
trt <- trt_levels[!(trt_levels %in% 'Placebo Control')]
hte_dat$condition = 'Placebo Control'
abc_hte_ctrl <- predict(abc_hte_m,hte_dat)
hte_dat$condition = trt
abc_hte_trt <- predict(abc_hte_m,hte_dat)
abc_hte <- abc_hte_trt-abc_hte_ctrl
## calibration plot
abc_cal_dat <- data.frame(abc_ate,
abc_hte,
lp_abc)
abc_cal_dat$quantile_grp <- cut(abc_cal_dat$lp_abc,
breaks = quantile(abc_cal_dat$lp_abc, probs = seq(0, 1, by = 0.2)),
include.lowest = TRUE,
labels = FALSE)
## compute the averaged hte effect in each quantile risk grp
avg_hte <- abc_cal_dat %>%
group_by(quantile_grp) %>%
summarise(avg_hte = mean(abc_hte, na.rm = TRUE))
abc_boot_res <- rbind(abc_boot_res,avg_hte)
}
return(abc_boot_res)
}For Project ABC:
## compute bootstrapped CI
abc_boot_res <- boot2_f(mydat = ipcw_abc_dat)
abc_boot_cali_ci <- abc_boot_res %>%
group_by(quantile_grp) %>%
summarise(
lower_quantile = round(quantile(avg_hte, probs = 0.025), 4),
upper_quantile = round(quantile(avg_hte, probs = 0.975), 4)
)
abc_cali_result <- left_join(abc_avg_hte,abc_boot_cali_ci)For Project personality:
person_boot_res <- boot2_f(mydat = dat_person)
person_boot_cali_ci <- person_boot_res %>%
group_by(quantile_grp) %>%
summarise(
lower_quantile = round(quantile(avg_hte, probs = 0.025), 4),
upper_quantile = round(quantile(avg_hte, probs = 0.975), 4)
)
person_cali_result <- left_join(person_avg_hte,person_boot_cali_ci)
## identify the bounds for cali plots
max_cali_y <- max(abc_cali_result$upper_quantile,person_cali_result$upper_quantile)
min_cali_y <- min(abc_cali_result$lower_quantile,person_cali_result$lower_quantile)
## check if the ylims are correct
### I used -0.2 and 0.5 range for the y axis in the calibration plots
logic1 <- min_cali_y <= 0.05 & min_cali_y >=-0.2
logic2 <- max_cali_y <= 0.05 & max_cali_y >=-0.2
if(!logic1 & ! logic2){
print(paste0("please make sure the range of y axis in the following calibration plot is [", min_cali_y, ",",max_cali_y,']'))
}else{logic_3 = logic1+logic2}6.7 Risk stratification
abc_cali_plot <- ggplot(abc_cali_result, aes(x = as.factor(quantile_grp), y = abc_avg_hte)) +
geom_point(size = 2.5,color = "#8B0000") +
# geom_line(aes(group = 1), color = "blue") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile), width = 0.05, color = "#8B0000") +
labs(x = "",
y = "Mean heter_mean_diff"
# title = ""
) +
geom_hline(yintercept = abc_ate, linetype = "dashed", color = "grey")+
theme_minimal()+
scale_y_continuous(
limits = c(-0.2, 0.05),
breaks = seq(-0.2, 0.05, by = 0.05)
)+
# ylim(c(min_cali_y,max_cali_y))+
ylab("Mean difference")+
# ylab(expression(atop("Risk Difference, %", atop("Harm" %<-% phantom(" ") %->% "Benefit")))) + # Custom y-axis label
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 14))abc_lp_plot <- ggplot(abc_cal_dat, aes(x = lp_abc)) +
geom_histogram(binwidth = 0.02, fill = "#8B0000") +
labs(x = "Predicted CDI mean score using baseline covariates (Project ABC)", y = NULL) +
theme_minimal()
abc_combo_plot <- cowplot::plot_grid(
abc_cali_plot,
abc_lp_plot,
ncol = 1,
align = "v",
rel_heights = c(3, 1)
)
# Display the combined plot
abc_combo_plot## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6620 0.8980 0.9743 0.9782 1.0605 1.2931
person_cali_plot <- ggplot(person_cali_result, aes(x = as.factor(quantile_grp), y = person_avg_hte)) +
geom_point(size = 2.5, color = "#8B0000") +
# geom_line(aes(group = 1), color = "blue") +
geom_errorbar(aes(ymin = lower_quantile, ymax = upper_quantile), width = 0.05, color = "#8B0000") +
labs(x = "",
y = "Mean heter_mean_diff"
# title = ""
) +
geom_hline(yintercept = person_ate, linetype = "dashed", color = "grey")+
theme_minimal()+
scale_y_continuous(
limits = c(-0.2, 0.05),
breaks = seq(-0.2, 0.05, by = 0.05)
)+
ylab("Mean difference")+
# ylab(expression(atop("Risk Difference, %", atop("Harm" %<-% phantom(" ") %->% "Benefit")))) + # Custom y-axis label
theme(axis.title.y = element_text(angle = 90, vjust = 0.5, size = 14))person_lp_plot <- ggplot(person_cal_dat, aes(x = lp_person)) +
geom_histogram(binwidth = 0.01, fill = "#8B0000") +
labs(x = "Predicted CDI mean score using baseline covariates(Project Personality)", y = NULL) +
theme_minimal()
person_combo_plot <- cowplot::plot_grid(
person_cali_plot,
person_lp_plot,
ncol = 1,
align = "v",
rel_heights = c(3, 1)
)
person_combo_plotDistribution of baseline depression severity score
abc_cal_dat_report <- abc_cal_dat
person_cal_dat_report <- person_cal_dat
abc_cal_dat_report$grp ="Project ABC"
person_cal_dat_report$grp = "Project Personality"
colnames(person_cal_dat_report) = colnames(abc_cal_dat_report)
combined_cal_dat_report<- rbind(abc_cal_dat_report,person_cal_dat_report)
combined_hist_plot <-ggplot(combined_cal_dat_report, aes(x = lp_abc, fill = grp)) +
geom_histogram(binwidth = 0.03, alpha = 0.8, position = "identity") +
labs(x = "Predicted 3-month CDI-SF mean score using baseline prediction models", y = "Frequency", fill = "Project") +
theme_minimal() +
scale_fill_manual(values = c("Project ABC" = "#1f78b4", "Project Personality" = "#8B0000"))
combined_hist_plot